Add some white lines to make the code more compliant
This commit is contained in:
parent
9fa76c4c20
commit
0239ab3b6a
5
main.py
5
main.py
@ -16,6 +16,7 @@ def __check_file(filename:str, mode:int = os.R_OK) -> bool:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def create_arg_parser():
|
||||
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
|
||||
parser.add_argument('files', nargs='*')
|
||||
@ -29,6 +30,7 @@ def create_arg_parser():
|
||||
help='list all the harmful metadata of a file without removing them')
|
||||
return parser
|
||||
|
||||
|
||||
def show_meta(filename:str):
|
||||
if not __check_file(filename):
|
||||
return
|
||||
@ -44,6 +46,7 @@ def show_meta(filename:str):
|
||||
except UnicodeEncodeError:
|
||||
print(" %s: harmful content" % k)
|
||||
|
||||
|
||||
def clean_meta(filename:str):
|
||||
if not __check_file(filename, os.R_OK|os.W_OK):
|
||||
return
|
||||
@ -54,6 +57,7 @@ def clean_meta(filename:str):
|
||||
return
|
||||
p.remove_all()
|
||||
|
||||
|
||||
def show_parsers():
|
||||
print('[+] Supported formats:')
|
||||
for parser in parser_factory._get_parsers():
|
||||
@ -61,6 +65,7 @@ def show_parsers():
|
||||
extensions = ', '.join(mimetypes.guess_all_extensions(mtype))
|
||||
print(' - %s (%s)' % (mtype, extensions))
|
||||
|
||||
|
||||
def __get_files_recursively(files):
|
||||
for f in files:
|
||||
if os.path.isfile(f):
|
||||
|
@ -1,5 +1,6 @@
|
||||
import abc
|
||||
|
||||
|
||||
class AbstractParser(abc.ABC):
|
||||
meta_list = set()
|
||||
mimetypes = set()
|
||||
|
@ -4,6 +4,7 @@ import mutagen
|
||||
|
||||
from . import abstract
|
||||
|
||||
|
||||
class MutagenParser(abstract.AbstractParser):
|
||||
def get_meta(self):
|
||||
f = mutagen.File(self.filename)
|
||||
@ -18,6 +19,7 @@ class MutagenParser(abstract.AbstractParser):
|
||||
f.save()
|
||||
return True
|
||||
|
||||
|
||||
class MP3Parser(MutagenParser):
|
||||
mimetypes = {'audio/mpeg', }
|
||||
|
||||
@ -28,8 +30,10 @@ class MP3Parser(MutagenParser):
|
||||
metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text))
|
||||
return metadata
|
||||
|
||||
|
||||
class OGGParser(MutagenParser):
|
||||
mimetypes = {'audio/ogg', }
|
||||
|
||||
|
||||
class FLACParser(MutagenParser):
|
||||
mimetypes = {'audio/flac', }
|
||||
|
@ -1,5 +1,6 @@
|
||||
from . import abstract
|
||||
|
||||
|
||||
class HarmlessParser(abstract.AbstractParser):
|
||||
""" This is the parser for filetypes that do not contain metadata. """
|
||||
mimetypes = {'application/xml', 'text/plain', 'application/rdf+xml'}
|
||||
|
@ -10,6 +10,7 @@ from gi.repository import GdkPixbuf
|
||||
|
||||
from . import abstract
|
||||
|
||||
|
||||
class PNGParser(abstract.AbstractParser):
|
||||
mimetypes = {'image/png', }
|
||||
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
|
||||
@ -31,6 +32,7 @@ class PNGParser(abstract.AbstractParser):
|
||||
surface.write_to_png(self.output_filename)
|
||||
return True
|
||||
|
||||
|
||||
class GdkPixbufAbstractParser(abstract.AbstractParser):
|
||||
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
|
||||
this has the side-effect of removing metadata completely.
|
||||
|
@ -7,6 +7,7 @@ import zipfile
|
||||
|
||||
from . import abstract, parser_factory
|
||||
|
||||
|
||||
class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
||||
def _clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo:
|
||||
zipinfo.compress_type = zipfile.ZIP_DEFLATED
|
||||
@ -46,6 +47,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
||||
with open(tmp_parser.output_filename, 'rb') as f:
|
||||
zout.writestr(clean_zinfo, f.read())
|
||||
|
||||
|
||||
class MSOfficeParser(ArchiveBasedAbstractParser):
|
||||
mimetypes = {
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
|
@ -16,6 +16,7 @@ for module_loader, name, ispkg in pkgutil.walk_packages('.src'):
|
||||
continue
|
||||
importlib.import_module(name)
|
||||
|
||||
|
||||
def _get_parsers() -> list:
|
||||
""" Get all our parsers!"""
|
||||
def __get_parsers(cls):
|
||||
@ -23,6 +24,7 @@ def _get_parsers() -> list:
|
||||
[g for s in cls.__subclasses__() for g in __get_parsers(s)]
|
||||
return __get_parsers(abstract.AbstractParser)
|
||||
|
||||
|
||||
def get_parser(filename: str) -> (T, str):
|
||||
mtype, _ = mimetypes.guess_type(filename)
|
||||
|
||||
|
@ -13,6 +13,7 @@ class TestHelp(unittest.TestCase):
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'usage: main.py [-h] [-c] [-l] [-s] [files [files ...]]', stdout)
|
||||
|
||||
|
||||
class TestGetMeta(unittest.TestCase):
|
||||
def test_pdf(self):
|
||||
proc = subprocess.Popen(['./main.py', '--show', './tests/data/dirty.pdf'],
|
||||
|
@ -8,6 +8,7 @@ import tempfile
|
||||
|
||||
from src import pdf, images, audio, office, parser_factory
|
||||
|
||||
|
||||
class TestParserFactory(unittest.TestCase):
|
||||
def test_subsubcalss(self):
|
||||
""" Test that our module auto-detection is handling sub-sub-classes """
|
||||
@ -15,6 +16,7 @@ class TestParserFactory(unittest.TestCase):
|
||||
self.assertEqual(mimetype, 'audio/mpeg')
|
||||
self.assertEqual(parser.__class__, audio.MP3Parser)
|
||||
|
||||
|
||||
class TestGetMeta(unittest.TestCase):
|
||||
def test_pdf(self):
|
||||
p = pdf.PDFParser('./tests/data/dirty.pdf')
|
||||
@ -132,6 +134,7 @@ class TestDeepCleaning(unittest.TestCase):
|
||||
|
||||
os.remove('./tests/data/clean.odt')
|
||||
|
||||
|
||||
class TestCleaning(unittest.TestCase):
|
||||
def test_pdf(self):
|
||||
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
|
||||
|
Loading…
x
Reference in New Issue
Block a user