1
0
mirror of synced 2024-11-22 09:14:23 +01:00

Add support for inplace cleaning

This commit is contained in:
jvoisin 2019-08-31 10:31:08 -07:00
parent d76a6cbb18
commit 40669186c9
2 changed files with 46 additions and 6 deletions

17
mat2
View File

@ -53,6 +53,8 @@ def create_arg_parser() -> argparse.ArgumentParser:
help='how to handle unknown members of archive-style ' help='how to handle unknown members of archive-style '
'files (policy should be one of: %s) [Default: abort]' % 'files (policy should be one of: %s) [Default: abort]' %
', '.join(p.value for p in UnknownMemberPolicy)) ', '.join(p.value for p in UnknownMemberPolicy))
parser.add_argument('--inplace', action='store_true',
help='clean in place, without backup')
excl_group = parser.add_mutually_exclusive_group() excl_group = parser.add_mutually_exclusive_group()
excl_group.add_argument('files', nargs='*', help='the files to process', excl_group.add_argument('files', nargs='*', help='the files to process',
@ -114,8 +116,10 @@ def __print_meta(filename: str, metadata: dict, depth: int = 1):
print(padding + " %s: harmful content" % k) print(padding + " %s: harmful content" % k)
def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy) -> bool: def clean_meta(filename: str, is_lightweight: bool, inplace: bool,
if not __check_file(filename, os.R_OK): policy: UnknownMemberPolicy) -> bool:
mode = (os.R_OK | os.W_OK) if inplace else os.R_OK
if not __check_file(filename, mode):
return False return False
p, mtype = parser_factory.get_parser(filename) # type: ignore p, mtype = parser_factory.get_parser(filename) # type: ignore
@ -127,7 +131,10 @@ def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy)
try: try:
logging.debug('Cleaning %s…', filename) logging.debug('Cleaning %s…', filename)
return p.remove_all() ret = p.remove_all()
if inplace is True:
os.rename(p.output_filename, filename)
return ret
except RuntimeError as e: except RuntimeError as e:
print("[-] %s can't be cleaned: %s" % (filename, e)) print("[-] %s can't be cleaned: %s" % (filename, e))
return False return False
@ -190,6 +197,7 @@ def main() -> int:
return 0 return 0
else: else:
inplace = args.inplace
policy = UnknownMemberPolicy(args.unknown_members) policy = UnknownMemberPolicy(args.unknown_members)
if policy == UnknownMemberPolicy.KEEP: if policy == UnknownMemberPolicy.KEEP:
logging.warning('Keeping unknown member files may leak metadata in the resulting file!') logging.warning('Keeping unknown member files may leak metadata in the resulting file!')
@ -201,7 +209,8 @@ def main() -> int:
with concurrent.futures.ProcessPoolExecutor() as executor: with concurrent.futures.ProcessPoolExecutor() as executor:
futures = list() futures = list()
for f in files: for f in files:
future = executor.submit(clean_meta, f, args.lightweight, policy) future = executor.submit(clean_meta, f, args.lightweight,
inplace, policy)
futures.append(future) futures.append(future)
for future in concurrent.futures.as_completed(futures): for future in concurrent.futures.as_completed(futures):
no_failure &= future.result() no_failure &= future.result()

View File

@ -20,7 +20,7 @@ class TestHelp(unittest.TestCase):
def test_help(self): def test_help(self):
proc = subprocess.Popen(mat2_binary + ['--help'], stdout=subprocess.PIPE) proc = subprocess.Popen(mat2_binary + ['--help'], stdout=subprocess.PIPE)
stdout, _ = proc.communicate() stdout, _ = proc.communicate()
self.assertIn(b'mat2 [-h] [-V] [--unknown-members policy] [-v] [-l]', self.assertIn(b'mat2 [-h] [-V] [--unknown-members policy] [--inplace] [-v] [-l]',
stdout) stdout)
self.assertIn(b'[--check-dependencies] [-L | -s]', stdout) self.assertIn(b'[--check-dependencies] [-L | -s]', stdout)
self.assertIn(b'[files [files ...]]', stdout) self.assertIn(b'[files [files ...]]', stdout)
@ -28,7 +28,7 @@ class TestHelp(unittest.TestCase):
def test_no_arg(self): def test_no_arg(self):
proc = subprocess.Popen(mat2_binary, stdout=subprocess.PIPE) proc = subprocess.Popen(mat2_binary, stdout=subprocess.PIPE)
stdout, _ = proc.communicate() stdout, _ = proc.communicate()
self.assertIn(b'mat2 [-h] [-V] [--unknown-members policy] [-v] [-l]', self.assertIn(b'mat2 [-h] [-V] [--unknown-members policy] [--inplace] [-v] [-l]',
stdout) stdout)
self.assertIn(b'[--check-dependencies] [-L | -s]', stdout) self.assertIn(b'[--check-dependencies] [-L | -s]', stdout)
self.assertIn(b'[files [files ...]]', stdout) self.assertIn(b'[files [files ...]]', stdout)
@ -241,3 +241,34 @@ class TestCommandLineParallel(unittest.TestCase):
os.remove('./tests/data/dirty_%d.cleaned.jpg' % i) os.remove('./tests/data/dirty_%d.cleaned.jpg' % i)
os.remove(path) os.remove(path)
os.remove('./tests/data/dirty_%d.docx' % i) os.remove('./tests/data/dirty_%d.docx' % i)
class TestInplaceCleaning(unittest.TestCase):
def test_cleaning(self):
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
proc = subprocess.Popen(mat2_binary + ['--inplace', './tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b' No metadata found in ./tests/data/clean.jpg.\n', stdout)
os.remove('./tests/data/clean.jpg')
def test_cleaning_multiple_one_fails(self):
files = ['./tests/data/clean_%d.jpg' % i for i in range(9)]
for f in files:
shutil.copy('./tests/data/dirty.jpg', f)
shutil.copy('./tests/data/dirty.torrent', './tests/data/clean_9.jpg')
proc = subprocess.Popen(mat2_binary + ['--inplace'] + files,
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
for f in files:
p = images.JPGParser(f)
meta = p.get_meta()
self.assertEqual(meta, {})
for i in range(10):
os.remove('./tests/data/clean_%d.jpg' % i)