1
0
mirror of synced 2024-11-22 09:14:23 +01:00

Add some white lines to make the code more compliant

This commit is contained in:
jvoisin 2018-04-04 23:21:48 +02:00
parent 9fa76c4c20
commit 0239ab3b6a
9 changed files with 21 additions and 0 deletions

View File

@ -16,6 +16,7 @@ def __check_file(filename:str, mode:int = os.R_OK) -> bool:
return False
return True
def create_arg_parser():
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
parser.add_argument('files', nargs='*')
@ -29,6 +30,7 @@ def create_arg_parser():
help='list all the harmful metadata of a file without removing them')
return parser
def show_meta(filename:str):
if not __check_file(filename):
return
@ -44,6 +46,7 @@ def show_meta(filename:str):
except UnicodeEncodeError:
print(" %s: harmful content" % k)
def clean_meta(filename:str):
if not __check_file(filename, os.R_OK|os.W_OK):
return
@ -54,6 +57,7 @@ def clean_meta(filename:str):
return
p.remove_all()
def show_parsers():
print('[+] Supported formats:')
for parser in parser_factory._get_parsers():
@ -61,6 +65,7 @@ def show_parsers():
extensions = ', '.join(mimetypes.guess_all_extensions(mtype))
print(' - %s (%s)' % (mtype, extensions))
def __get_files_recursively(files):
for f in files:
if os.path.isfile(f):

View File

@ -1,5 +1,6 @@
import abc
class AbstractParser(abc.ABC):
meta_list = set()
mimetypes = set()

View File

@ -4,6 +4,7 @@ import mutagen
from . import abstract
class MutagenParser(abstract.AbstractParser):
def get_meta(self):
f = mutagen.File(self.filename)
@ -18,6 +19,7 @@ class MutagenParser(abstract.AbstractParser):
f.save()
return True
class MP3Parser(MutagenParser):
mimetypes = {'audio/mpeg', }
@ -28,8 +30,10 @@ class MP3Parser(MutagenParser):
metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text))
return metadata
class OGGParser(MutagenParser):
mimetypes = {'audio/ogg', }
class FLACParser(MutagenParser):
mimetypes = {'audio/flac', }

View File

@ -1,5 +1,6 @@
from . import abstract
class HarmlessParser(abstract.AbstractParser):
""" This is the parser for filetypes that do not contain metadata. """
mimetypes = {'application/xml', 'text/plain', 'application/rdf+xml'}

View File

@ -10,6 +10,7 @@ from gi.repository import GdkPixbuf
from . import abstract
class PNGParser(abstract.AbstractParser):
mimetypes = {'image/png', }
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
@ -31,6 +32,7 @@ class PNGParser(abstract.AbstractParser):
surface.write_to_png(self.output_filename)
return True
class GdkPixbufAbstractParser(abstract.AbstractParser):
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
this has the side-effect of removing metadata completely.

View File

@ -7,6 +7,7 @@ import zipfile
from . import abstract, parser_factory
class ArchiveBasedAbstractParser(abstract.AbstractParser):
def _clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo:
zipinfo.compress_type = zipfile.ZIP_DEFLATED
@ -46,6 +47,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
with open(tmp_parser.output_filename, 'rb') as f:
zout.writestr(clean_zinfo, f.read())
class MSOfficeParser(ArchiveBasedAbstractParser):
mimetypes = {
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',

View File

@ -16,6 +16,7 @@ for module_loader, name, ispkg in pkgutil.walk_packages('.src'):
continue
importlib.import_module(name)
def _get_parsers() -> list:
""" Get all our parsers!"""
def __get_parsers(cls):
@ -23,6 +24,7 @@ def _get_parsers() -> list:
[g for s in cls.__subclasses__() for g in __get_parsers(s)]
return __get_parsers(abstract.AbstractParser)
def get_parser(filename: str) -> (T, str):
mtype, _ = mimetypes.guess_type(filename)

View File

@ -13,6 +13,7 @@ class TestHelp(unittest.TestCase):
stdout, _ = proc.communicate()
self.assertIn(b'usage: main.py [-h] [-c] [-l] [-s] [files [files ...]]', stdout)
class TestGetMeta(unittest.TestCase):
def test_pdf(self):
proc = subprocess.Popen(['./main.py', '--show', './tests/data/dirty.pdf'],

View File

@ -8,6 +8,7 @@ import tempfile
from src import pdf, images, audio, office, parser_factory
class TestParserFactory(unittest.TestCase):
def test_subsubcalss(self):
""" Test that our module auto-detection is handling sub-sub-classes """
@ -15,6 +16,7 @@ class TestParserFactory(unittest.TestCase):
self.assertEqual(mimetype, 'audio/mpeg')
self.assertEqual(parser.__class__, audio.MP3Parser)
class TestGetMeta(unittest.TestCase):
def test_pdf(self):
p = pdf.PDFParser('./tests/data/dirty.pdf')
@ -132,6 +134,7 @@ class TestDeepCleaning(unittest.TestCase):
os.remove('./tests/data/clean.odt')
class TestCleaning(unittest.TestCase):
def test_pdf(self):
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')