1
0
Fork 0

Do a pylint pass

This commit is contained in:
jvoisin 2018-05-16 22:36:59 +02:00
parent 0354c3b7e3
commit fa7d18784c
9 changed files with 89 additions and 79 deletions

View File

@ -48,6 +48,7 @@ def show_meta(filename:str):
if p is None:
print("[-] %s's format (%s) is not supported" % (filename, mtype))
return
print("[+] Metadata for %s:" % filename)
for k, v in p.get_meta().items():
try: # FIXME this is ugly.
@ -55,11 +56,10 @@ def show_meta(filename:str):
except UnicodeEncodeError:
print(" %s: harmful content" % k)
def clean_meta(params: Tuple[str, bool]) -> bool:
filename, is_lightweigth = params
if not __check_file(filename, os.R_OK|os.W_OK):
return
return False
p, mtype = parser_factory.get_parser(filename)
if p is None:
@ -102,12 +102,12 @@ def main():
if not args.list:
return arg_parser.print_help()
show_parsers()
return
return 0
elif args.show:
for f in __get_files_recursively(args.files):
show_meta(f)
return
return 0
else:
p = multiprocessing.Pool()

View File

@ -2,4 +2,5 @@
# A set of extension that aren't supported, despite matching a supported mimetype
unsupported_extensions = set(['bat', 'c', 'h', 'ksh', 'pl', 'txt', 'asc',
'text', 'pot', 'brf', 'srt', 'rdf', 'wsdl', 'xpdl', 'xsl', 'xsd'])
'text', 'pot', 'brf', 'srt', 'rdf', 'wsdl',
'xpdl', 'xsl', 'xsd'])

View File

@ -6,6 +6,7 @@ class HarmlessParser(abstract.AbstractParser):
mimetypes = {'application/xml', 'text/plain'}
def __init__(self, filename: str):
super().__init__(filename)
self.filename = filename
self.output_filename = filename

View File

@ -14,11 +14,12 @@ from . import abstract
class PNGParser(abstract.AbstractParser):
mimetypes = {'image/png', }
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate',
"FileInodeChangeDate", 'FilePermissions', 'FileType',
'FileTypeExtension', 'MIMEType', 'ImageWidth', 'BitDepth', 'ColorType',
'Compression', 'Filter', 'Interlace', 'BackgroundColor', 'ImageSize',
'Megapixels', 'ImageHeight'}
'Directory', 'FileSize', 'FileModifyDate',
'FileAccessDate', 'FileInodeChangeDate',
'FilePermissions', 'FileType', 'FileTypeExtension',
'MIMEType', 'ImageWidth', 'BitDepth', 'ColorType',
'Compression', 'Filter', 'Interlace', 'BackgroundColor',
'ImageSize', 'Megapixels', 'ImageHeight'}
def __init__(self, filename):
super().__init__(filename)
@ -63,24 +64,26 @@ class GdkPixbufAbstractParser(abstract.AbstractParser):
class JPGParser(GdkPixbufAbstractParser):
mimetypes = {'image/jpeg'}
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate',
"FileInodeChangeDate", 'FilePermissions', 'FileType',
'FileTypeExtension', 'MIMEType', 'ImageWidth',
'ImageSize', 'BitsPerSample', 'ColorComponents', 'EncodingProcess',
'JFIFVersion', 'ResolutionUnit', 'XResolution', 'YCbCrSubSampling',
'Directory', 'FileSize', 'FileModifyDate',
'FileAccessDate', "FileInodeChangeDate",
'FilePermissions', 'FileType', 'FileTypeExtension',
'MIMEType', 'ImageWidth', 'ImageSize', 'BitsPerSample',
'ColorComponents', 'EncodingProcess', 'JFIFVersion',
'ResolutionUnit', 'XResolution', 'YCbCrSubSampling',
'YResolution', 'Megapixels', 'ImageHeight'}
class TiffParser(GdkPixbufAbstractParser):
mimetypes = {'image/tiff'}
meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
'FillOrder', 'PhotometricInterpretation', 'PlanarConfiguration',
'RowsPerStrip', 'SamplesPerPixel', 'StripByteCounts',
'StripOffsets', 'BitsPerSample', 'Directory', 'ExifToolVersion',
'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate',
'FileName', 'FilePermissions', 'FileSize', 'FileType',
'FileTypeExtension', 'ImageHeight', 'ImageSize', 'ImageWidth',
'MIMEType', 'Megapixels', 'SourceFile'}
'FillOrder', 'PhotometricInterpretation',
'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel',
'StripByteCounts', 'StripOffsets', 'BitsPerSample',
'Directory', 'ExifToolVersion', 'FileAccessDate',
'FileInodeChangeDate', 'FileModifyDate', 'FileName',
'FilePermissions', 'FileSize', 'FileType',
'FileTypeExtension', 'ImageHeight', 'ImageSize',
'ImageWidth', 'MIMEType', 'Megapixels', 'SourceFile'}
class BMPParser(GdkPixbufAbstractParser):
@ -88,11 +91,11 @@ class BMPParser(GdkPixbufAbstractParser):
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
'FileSize', 'FileModifyDate', 'FileAccessDate',
'FileInodeChangeDate', 'FilePermissions', 'FileType',
'FileTypeExtension', 'MIMEType', 'BMPVersion', 'ImageWidth',
'ImageHeight', 'Planes', 'BitDepth', 'Compression', 'ImageLength',
'PixelsPerMeterX', 'PixelsPerMeterY', 'NumColors',
'NumImportantColors', 'RedMask', 'GreenMask', 'BlueMask',
'AlphaMask', 'ColorSpace', 'RedEndpoint', 'GreenEndpoint',
'BlueEndpoint', 'GammaRed', 'GammaGreen', 'GammaBlue', 'ImageSize',
'Megapixels'}
'FileTypeExtension', 'MIMEType', 'BMPVersion',
'ImageWidth', 'ImageHeight', 'Planes', 'BitDepth',
'Compression', 'ImageLength', 'PixelsPerMeterX',
'PixelsPerMeterY', 'NumColors', 'NumImportantColors',
'RedMask', 'GreenMask', 'BlueMask', 'AlphaMask',
'ColorSpace', 'RedEndpoint', 'GreenEndpoint',
'BlueEndpoint', 'GammaRed', 'GammaGreen', 'GammaBlue',
'ImageSize', 'Megapixels'}

View File

@ -35,7 +35,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
return metadata
def _clean_internal_file(self, item:zipfile.ZipInfo, temp_folder:str, zin:zipfile.ZipFile, zout:zipfile.ZipFile):
def _clean_internal_file(self, item: zipfile.ZipInfo, temp_folder: str,
zin: zipfile.ZipFile, zout: zipfile.ZipFile):
zin.extract(member=item, path=temp_folder)
tmp_parser, mtype = parser_factory.get_parser(os.path.join(temp_folder, item.filename))
if not tmp_parser:

View File

@ -2,10 +2,10 @@ import os
import mimetypes
import importlib
import pkgutil
from typing import TypeVar
from . import abstract, unsupported_extensions
from typing import TypeVar
T = TypeVar('T', bound='abstract.AbstractParser')

View File

@ -103,7 +103,8 @@ class PDFParser(abstract.AbstractParser):
return True
def __remove_superficial_meta(self, in_file:str, out_file: str) -> bool:
@staticmethod
def __remove_superficial_meta(in_file: str, out_file: str) -> bool:
document = Poppler.Document.new_from_file('file://' + in_file)
document.set_producer('')
document.set_creator('')
@ -112,7 +113,8 @@ class PDFParser(abstract.AbstractParser):
return True
def __parse_metadata_field(self, data:str) -> dict:
@staticmethod
def __parse_metadata_field(data: str) -> dict:
metadata = {}
for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I):
metadata[key] = value

View File

@ -53,7 +53,8 @@ class _BencodeHandler(object):
list: self.__encode_list,
}
def __decode_int(self, s:str) -> (int, str):
@staticmethod
def __decode_int(s: str) -> (int, str):
s = s[1:]
next_idx = s.index(b'e')
if s.startswith(b'-0'):
@ -62,7 +63,8 @@ class _BencodeHandler(object):
raise ValueError # no leading zero except for zero itself
return int(s[:next_idx]), s[next_idx+1:]
def __decode_string(self, s:str) -> (str, str):
@staticmethod
def __decode_string(s: str) -> (str, str):
sep = s.index(b':')
str_len = int(s[:sep])
if str_len < 0: