Add some white lines to make the code more compliant
This commit is contained in:
parent
9fa76c4c20
commit
0239ab3b6a
5
main.py
5
main.py
@ -16,6 +16,7 @@ def __check_file(filename:str, mode:int = os.R_OK) -> bool:
|
|||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def create_arg_parser():
|
def create_arg_parser():
|
||||||
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
|
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
|
||||||
parser.add_argument('files', nargs='*')
|
parser.add_argument('files', nargs='*')
|
||||||
@ -29,6 +30,7 @@ def create_arg_parser():
|
|||||||
help='list all the harmful metadata of a file without removing them')
|
help='list all the harmful metadata of a file without removing them')
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
def show_meta(filename:str):
|
def show_meta(filename:str):
|
||||||
if not __check_file(filename):
|
if not __check_file(filename):
|
||||||
return
|
return
|
||||||
@ -44,6 +46,7 @@ def show_meta(filename:str):
|
|||||||
except UnicodeEncodeError:
|
except UnicodeEncodeError:
|
||||||
print(" %s: harmful content" % k)
|
print(" %s: harmful content" % k)
|
||||||
|
|
||||||
|
|
||||||
def clean_meta(filename:str):
|
def clean_meta(filename:str):
|
||||||
if not __check_file(filename, os.R_OK|os.W_OK):
|
if not __check_file(filename, os.R_OK|os.W_OK):
|
||||||
return
|
return
|
||||||
@ -54,6 +57,7 @@ def clean_meta(filename:str):
|
|||||||
return
|
return
|
||||||
p.remove_all()
|
p.remove_all()
|
||||||
|
|
||||||
|
|
||||||
def show_parsers():
|
def show_parsers():
|
||||||
print('[+] Supported formats:')
|
print('[+] Supported formats:')
|
||||||
for parser in parser_factory._get_parsers():
|
for parser in parser_factory._get_parsers():
|
||||||
@ -61,6 +65,7 @@ def show_parsers():
|
|||||||
extensions = ', '.join(mimetypes.guess_all_extensions(mtype))
|
extensions = ', '.join(mimetypes.guess_all_extensions(mtype))
|
||||||
print(' - %s (%s)' % (mtype, extensions))
|
print(' - %s (%s)' % (mtype, extensions))
|
||||||
|
|
||||||
|
|
||||||
def __get_files_recursively(files):
|
def __get_files_recursively(files):
|
||||||
for f in files:
|
for f in files:
|
||||||
if os.path.isfile(f):
|
if os.path.isfile(f):
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import abc
|
import abc
|
||||||
|
|
||||||
|
|
||||||
class AbstractParser(abc.ABC):
|
class AbstractParser(abc.ABC):
|
||||||
meta_list = set()
|
meta_list = set()
|
||||||
mimetypes = set()
|
mimetypes = set()
|
||||||
|
@ -4,6 +4,7 @@ import mutagen
|
|||||||
|
|
||||||
from . import abstract
|
from . import abstract
|
||||||
|
|
||||||
|
|
||||||
class MutagenParser(abstract.AbstractParser):
|
class MutagenParser(abstract.AbstractParser):
|
||||||
def get_meta(self):
|
def get_meta(self):
|
||||||
f = mutagen.File(self.filename)
|
f = mutagen.File(self.filename)
|
||||||
@ -18,6 +19,7 @@ class MutagenParser(abstract.AbstractParser):
|
|||||||
f.save()
|
f.save()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
class MP3Parser(MutagenParser):
|
class MP3Parser(MutagenParser):
|
||||||
mimetypes = {'audio/mpeg', }
|
mimetypes = {'audio/mpeg', }
|
||||||
|
|
||||||
@ -28,8 +30,10 @@ class MP3Parser(MutagenParser):
|
|||||||
metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text))
|
metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text))
|
||||||
return metadata
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
class OGGParser(MutagenParser):
|
class OGGParser(MutagenParser):
|
||||||
mimetypes = {'audio/ogg', }
|
mimetypes = {'audio/ogg', }
|
||||||
|
|
||||||
|
|
||||||
class FLACParser(MutagenParser):
|
class FLACParser(MutagenParser):
|
||||||
mimetypes = {'audio/flac', }
|
mimetypes = {'audio/flac', }
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from . import abstract
|
from . import abstract
|
||||||
|
|
||||||
|
|
||||||
class HarmlessParser(abstract.AbstractParser):
|
class HarmlessParser(abstract.AbstractParser):
|
||||||
""" This is the parser for filetypes that do not contain metadata. """
|
""" This is the parser for filetypes that do not contain metadata. """
|
||||||
mimetypes = {'application/xml', 'text/plain', 'application/rdf+xml'}
|
mimetypes = {'application/xml', 'text/plain', 'application/rdf+xml'}
|
||||||
|
@ -10,6 +10,7 @@ from gi.repository import GdkPixbuf
|
|||||||
|
|
||||||
from . import abstract
|
from . import abstract
|
||||||
|
|
||||||
|
|
||||||
class PNGParser(abstract.AbstractParser):
|
class PNGParser(abstract.AbstractParser):
|
||||||
mimetypes = {'image/png', }
|
mimetypes = {'image/png', }
|
||||||
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
|
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
|
||||||
@ -31,6 +32,7 @@ class PNGParser(abstract.AbstractParser):
|
|||||||
surface.write_to_png(self.output_filename)
|
surface.write_to_png(self.output_filename)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
class GdkPixbufAbstractParser(abstract.AbstractParser):
|
class GdkPixbufAbstractParser(abstract.AbstractParser):
|
||||||
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
|
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
|
||||||
this has the side-effect of removing metadata completely.
|
this has the side-effect of removing metadata completely.
|
||||||
|
@ -7,6 +7,7 @@ import zipfile
|
|||||||
|
|
||||||
from . import abstract, parser_factory
|
from . import abstract, parser_factory
|
||||||
|
|
||||||
|
|
||||||
class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
||||||
def _clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo:
|
def _clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo:
|
||||||
zipinfo.compress_type = zipfile.ZIP_DEFLATED
|
zipinfo.compress_type = zipfile.ZIP_DEFLATED
|
||||||
@ -46,6 +47,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
with open(tmp_parser.output_filename, 'rb') as f:
|
with open(tmp_parser.output_filename, 'rb') as f:
|
||||||
zout.writestr(clean_zinfo, f.read())
|
zout.writestr(clean_zinfo, f.read())
|
||||||
|
|
||||||
|
|
||||||
class MSOfficeParser(ArchiveBasedAbstractParser):
|
class MSOfficeParser(ArchiveBasedAbstractParser):
|
||||||
mimetypes = {
|
mimetypes = {
|
||||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||||
|
@ -16,6 +16,7 @@ for module_loader, name, ispkg in pkgutil.walk_packages('.src'):
|
|||||||
continue
|
continue
|
||||||
importlib.import_module(name)
|
importlib.import_module(name)
|
||||||
|
|
||||||
|
|
||||||
def _get_parsers() -> list:
|
def _get_parsers() -> list:
|
||||||
""" Get all our parsers!"""
|
""" Get all our parsers!"""
|
||||||
def __get_parsers(cls):
|
def __get_parsers(cls):
|
||||||
@ -23,6 +24,7 @@ def _get_parsers() -> list:
|
|||||||
[g for s in cls.__subclasses__() for g in __get_parsers(s)]
|
[g for s in cls.__subclasses__() for g in __get_parsers(s)]
|
||||||
return __get_parsers(abstract.AbstractParser)
|
return __get_parsers(abstract.AbstractParser)
|
||||||
|
|
||||||
|
|
||||||
def get_parser(filename: str) -> (T, str):
|
def get_parser(filename: str) -> (T, str):
|
||||||
mtype, _ = mimetypes.guess_type(filename)
|
mtype, _ = mimetypes.guess_type(filename)
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ class TestHelp(unittest.TestCase):
|
|||||||
stdout, _ = proc.communicate()
|
stdout, _ = proc.communicate()
|
||||||
self.assertIn(b'usage: main.py [-h] [-c] [-l] [-s] [files [files ...]]', stdout)
|
self.assertIn(b'usage: main.py [-h] [-c] [-l] [-s] [files [files ...]]', stdout)
|
||||||
|
|
||||||
|
|
||||||
class TestGetMeta(unittest.TestCase):
|
class TestGetMeta(unittest.TestCase):
|
||||||
def test_pdf(self):
|
def test_pdf(self):
|
||||||
proc = subprocess.Popen(['./main.py', '--show', './tests/data/dirty.pdf'],
|
proc = subprocess.Popen(['./main.py', '--show', './tests/data/dirty.pdf'],
|
||||||
|
@ -8,6 +8,7 @@ import tempfile
|
|||||||
|
|
||||||
from src import pdf, images, audio, office, parser_factory
|
from src import pdf, images, audio, office, parser_factory
|
||||||
|
|
||||||
|
|
||||||
class TestParserFactory(unittest.TestCase):
|
class TestParserFactory(unittest.TestCase):
|
||||||
def test_subsubcalss(self):
|
def test_subsubcalss(self):
|
||||||
""" Test that our module auto-detection is handling sub-sub-classes """
|
""" Test that our module auto-detection is handling sub-sub-classes """
|
||||||
@ -15,6 +16,7 @@ class TestParserFactory(unittest.TestCase):
|
|||||||
self.assertEqual(mimetype, 'audio/mpeg')
|
self.assertEqual(mimetype, 'audio/mpeg')
|
||||||
self.assertEqual(parser.__class__, audio.MP3Parser)
|
self.assertEqual(parser.__class__, audio.MP3Parser)
|
||||||
|
|
||||||
|
|
||||||
class TestGetMeta(unittest.TestCase):
|
class TestGetMeta(unittest.TestCase):
|
||||||
def test_pdf(self):
|
def test_pdf(self):
|
||||||
p = pdf.PDFParser('./tests/data/dirty.pdf')
|
p = pdf.PDFParser('./tests/data/dirty.pdf')
|
||||||
@ -132,6 +134,7 @@ class TestDeepCleaning(unittest.TestCase):
|
|||||||
|
|
||||||
os.remove('./tests/data/clean.odt')
|
os.remove('./tests/data/clean.odt')
|
||||||
|
|
||||||
|
|
||||||
class TestCleaning(unittest.TestCase):
|
class TestCleaning(unittest.TestCase):
|
||||||
def test_pdf(self):
|
def test_pdf(self):
|
||||||
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
|
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
|
||||||
|
Loading…
Reference in New Issue
Block a user