1
0
mirror of synced 2024-11-25 18:54:22 +01:00

Add some white lines to make the code more compliant

This commit is contained in:
jvoisin 2018-04-04 23:21:48 +02:00
parent 9fa76c4c20
commit 0239ab3b6a
9 changed files with 21 additions and 0 deletions

View File

@ -16,6 +16,7 @@ def __check_file(filename:str, mode:int = os.R_OK) -> bool:
return False return False
return True return True
def create_arg_parser(): def create_arg_parser():
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2') parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
parser.add_argument('files', nargs='*') parser.add_argument('files', nargs='*')
@ -29,6 +30,7 @@ def create_arg_parser():
help='list all the harmful metadata of a file without removing them') help='list all the harmful metadata of a file without removing them')
return parser return parser
def show_meta(filename:str): def show_meta(filename:str):
if not __check_file(filename): if not __check_file(filename):
return return
@ -44,6 +46,7 @@ def show_meta(filename:str):
except UnicodeEncodeError: except UnicodeEncodeError:
print(" %s: harmful content" % k) print(" %s: harmful content" % k)
def clean_meta(filename:str): def clean_meta(filename:str):
if not __check_file(filename, os.R_OK|os.W_OK): if not __check_file(filename, os.R_OK|os.W_OK):
return return
@ -54,6 +57,7 @@ def clean_meta(filename:str):
return return
p.remove_all() p.remove_all()
def show_parsers(): def show_parsers():
print('[+] Supported formats:') print('[+] Supported formats:')
for parser in parser_factory._get_parsers(): for parser in parser_factory._get_parsers():
@ -61,6 +65,7 @@ def show_parsers():
extensions = ', '.join(mimetypes.guess_all_extensions(mtype)) extensions = ', '.join(mimetypes.guess_all_extensions(mtype))
print(' - %s (%s)' % (mtype, extensions)) print(' - %s (%s)' % (mtype, extensions))
def __get_files_recursively(files): def __get_files_recursively(files):
for f in files: for f in files:
if os.path.isfile(f): if os.path.isfile(f):

View File

@ -1,5 +1,6 @@
import abc import abc
class AbstractParser(abc.ABC): class AbstractParser(abc.ABC):
meta_list = set() meta_list = set()
mimetypes = set() mimetypes = set()

View File

@ -4,6 +4,7 @@ import mutagen
from . import abstract from . import abstract
class MutagenParser(abstract.AbstractParser): class MutagenParser(abstract.AbstractParser):
def get_meta(self): def get_meta(self):
f = mutagen.File(self.filename) f = mutagen.File(self.filename)
@ -18,6 +19,7 @@ class MutagenParser(abstract.AbstractParser):
f.save() f.save()
return True return True
class MP3Parser(MutagenParser): class MP3Parser(MutagenParser):
mimetypes = {'audio/mpeg', } mimetypes = {'audio/mpeg', }
@ -28,8 +30,10 @@ class MP3Parser(MutagenParser):
metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text)) metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text))
return metadata return metadata
class OGGParser(MutagenParser): class OGGParser(MutagenParser):
mimetypes = {'audio/ogg', } mimetypes = {'audio/ogg', }
class FLACParser(MutagenParser): class FLACParser(MutagenParser):
mimetypes = {'audio/flac', } mimetypes = {'audio/flac', }

View File

@ -1,5 +1,6 @@
from . import abstract from . import abstract
class HarmlessParser(abstract.AbstractParser): class HarmlessParser(abstract.AbstractParser):
""" This is the parser for filetypes that do not contain metadata. """ """ This is the parser for filetypes that do not contain metadata. """
mimetypes = {'application/xml', 'text/plain', 'application/rdf+xml'} mimetypes = {'application/xml', 'text/plain', 'application/rdf+xml'}

View File

@ -10,6 +10,7 @@ from gi.repository import GdkPixbuf
from . import abstract from . import abstract
class PNGParser(abstract.AbstractParser): class PNGParser(abstract.AbstractParser):
mimetypes = {'image/png', } mimetypes = {'image/png', }
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
@ -31,6 +32,7 @@ class PNGParser(abstract.AbstractParser):
surface.write_to_png(self.output_filename) surface.write_to_png(self.output_filename)
return True return True
class GdkPixbufAbstractParser(abstract.AbstractParser): class GdkPixbufAbstractParser(abstract.AbstractParser):
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it, """ GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
this has the side-effect of removing metadata completely. this has the side-effect of removing metadata completely.

View File

@ -7,6 +7,7 @@ import zipfile
from . import abstract, parser_factory from . import abstract, parser_factory
class ArchiveBasedAbstractParser(abstract.AbstractParser): class ArchiveBasedAbstractParser(abstract.AbstractParser):
def _clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo: def _clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo:
zipinfo.compress_type = zipfile.ZIP_DEFLATED zipinfo.compress_type = zipfile.ZIP_DEFLATED
@ -46,6 +47,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
with open(tmp_parser.output_filename, 'rb') as f: with open(tmp_parser.output_filename, 'rb') as f:
zout.writestr(clean_zinfo, f.read()) zout.writestr(clean_zinfo, f.read())
class MSOfficeParser(ArchiveBasedAbstractParser): class MSOfficeParser(ArchiveBasedAbstractParser):
mimetypes = { mimetypes = {
'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',

View File

@ -16,6 +16,7 @@ for module_loader, name, ispkg in pkgutil.walk_packages('.src'):
continue continue
importlib.import_module(name) importlib.import_module(name)
def _get_parsers() -> list: def _get_parsers() -> list:
""" Get all our parsers!""" """ Get all our parsers!"""
def __get_parsers(cls): def __get_parsers(cls):
@ -23,6 +24,7 @@ def _get_parsers() -> list:
[g for s in cls.__subclasses__() for g in __get_parsers(s)] [g for s in cls.__subclasses__() for g in __get_parsers(s)]
return __get_parsers(abstract.AbstractParser) return __get_parsers(abstract.AbstractParser)
def get_parser(filename: str) -> (T, str): def get_parser(filename: str) -> (T, str):
mtype, _ = mimetypes.guess_type(filename) mtype, _ = mimetypes.guess_type(filename)

View File

@ -13,6 +13,7 @@ class TestHelp(unittest.TestCase):
stdout, _ = proc.communicate() stdout, _ = proc.communicate()
self.assertIn(b'usage: main.py [-h] [-c] [-l] [-s] [files [files ...]]', stdout) self.assertIn(b'usage: main.py [-h] [-c] [-l] [-s] [files [files ...]]', stdout)
class TestGetMeta(unittest.TestCase): class TestGetMeta(unittest.TestCase):
def test_pdf(self): def test_pdf(self):
proc = subprocess.Popen(['./main.py', '--show', './tests/data/dirty.pdf'], proc = subprocess.Popen(['./main.py', '--show', './tests/data/dirty.pdf'],

View File

@ -8,6 +8,7 @@ import tempfile
from src import pdf, images, audio, office, parser_factory from src import pdf, images, audio, office, parser_factory
class TestParserFactory(unittest.TestCase): class TestParserFactory(unittest.TestCase):
def test_subsubcalss(self): def test_subsubcalss(self):
""" Test that our module auto-detection is handling sub-sub-classes """ """ Test that our module auto-detection is handling sub-sub-classes """
@ -15,6 +16,7 @@ class TestParserFactory(unittest.TestCase):
self.assertEqual(mimetype, 'audio/mpeg') self.assertEqual(mimetype, 'audio/mpeg')
self.assertEqual(parser.__class__, audio.MP3Parser) self.assertEqual(parser.__class__, audio.MP3Parser)
class TestGetMeta(unittest.TestCase): class TestGetMeta(unittest.TestCase):
def test_pdf(self): def test_pdf(self):
p = pdf.PDFParser('./tests/data/dirty.pdf') p = pdf.PDFParser('./tests/data/dirty.pdf')
@ -132,6 +134,7 @@ class TestDeepCleaning(unittest.TestCase):
os.remove('./tests/data/clean.odt') os.remove('./tests/data/clean.odt')
class TestCleaning(unittest.TestCase): class TestCleaning(unittest.TestCase):
def test_pdf(self): def test_pdf(self):
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf') shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')