diff --git a/libmat2/audio.py b/libmat2/audio.py index a26f36f..99a335d 100644 --- a/libmat2/audio.py +++ b/libmat2/audio.py @@ -10,11 +10,7 @@ class MutagenParser(abstract.AbstractParser): super().__init__(filename) try: mutagen.File(self.filename) - except mutagen.flac.MutagenError: - raise ValueError - except mutagen.mp3.MutagenError: - raise ValueError - except mutagen.ogg.MutagenError: + except mutagen.MutagenError: raise ValueError def get_meta(self): @@ -47,4 +43,4 @@ class OGGParser(MutagenParser): class FLACParser(MutagenParser): - mimetypes = {'audio/flac', 'audio/x-flac' } + mimetypes = {'audio/flac', 'audio/x-flac'} diff --git a/libmat2/images.py b/libmat2/images.py index a7a9cad..74533b5 100644 --- a/libmat2/images.py +++ b/libmat2/images.py @@ -15,9 +15,9 @@ from gi.repository import GdkPixbuf from . import abstract -class __ImageParser(abstract.AbstractParser): +class _ImageParser(abstract.AbstractParser): @staticmethod - def __handle_problematic_filename(filename:str, callback) -> str: + def __handle_problematic_filename(filename: str, callback) -> str: """ This method takes a filename with a problematic name, and safely applies it a `callback`.""" tmpdirname = tempfile.mkdtemp() @@ -42,7 +42,7 @@ class __ImageParser(abstract.AbstractParser): meta.pop(key, None) return meta -class PNGParser(__ImageParser): +class PNGParser(_ImageParser): mimetypes = {'image/png', } meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', 'FileSize', 'FileModifyDate', @@ -65,7 +65,7 @@ class PNGParser(__ImageParser): return True -class GdkPixbufAbstractParser(__ImageParser): +class GdkPixbufAbstractParser(_ImageParser): """ GdkPixbuf can handle a lot of surfaces, so we're rending images on it, this has the side-effect of removing metadata completely. """ diff --git a/libmat2/office.py b/libmat2/office.py index acd8ca2..eae84f7 100644 --- a/libmat2/office.py +++ b/libmat2/office.py @@ -26,7 +26,7 @@ def _parse_xml(full_path: str): ns = parse_map(full_path) # Register the namespaces - for k,v in ns.items(): + for k, v in ns.items(): ET.register_namespace(k, v) return ET.parse(full_path), ns @@ -35,11 +35,11 @@ def _parse_xml(full_path: str): class ArchiveBasedAbstractParser(abstract.AbstractParser): # Those are the files that have a format that _isn't_ # supported by MAT2, but that we want to keep anyway. - files_to_keep = set() # type: Set[str] + files_to_keep = set() # type: Set[str] # Those are the files that we _do not_ want to keep, # no matter if they are supported or not. - files_to_omit = set() # type: Set[Pattern] + files_to_omit = set() # type: Set[Pattern] def __init__(self, filename): super().__init__(filename) @@ -48,7 +48,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): except zipfile.BadZipFile: raise ValueError - def _specific_cleanup(self, full_path:str) -> bool: + def _specific_cleanup(self, full_path: str) -> bool: """ This method can be used to apply specific treatment to files present in the archive.""" return True @@ -128,19 +128,19 @@ class MSOfficeParser(ArchiveBasedAbstractParser): 'application/vnd.openxmlformats-officedocument.presentationml.presentation' } files_to_keep = { - '[Content_Types].xml', - '_rels/.rels', - 'word/_rels/document.xml.rels', - 'word/document.xml', - 'word/fontTable.xml', - 'word/settings.xml', - 'word/styles.xml', + '[Content_Types].xml', + '_rels/.rels', + 'word/_rels/document.xml.rels', + 'word/document.xml', + 'word/fontTable.xml', + 'word/settings.xml', + 'word/styles.xml', } files_to_omit = set(map(re.compile, { # type: ignore - '^docProps/', + '^docProps/', })) - def __remove_revisions(self, full_path:str) -> bool: + def __remove_revisions(self, full_path: str) -> bool: """ In this function, we're changing the XML document in two times, since we don't want to change the tree we're iterating on.""" @@ -152,7 +152,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser): elif tree.find('.//w:ins', ns) is None: return True - parent_map = {c:p for p in tree.iter( ) for c in p} + parent_map = {c:p for p in tree.iter() for c in p} elements = list([element for element in tree.iterfind('.//w:del', ns)]) for element in elements: @@ -174,7 +174,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser): return True - def _specific_cleanup(self, full_path:str) -> bool: + def _specific_cleanup(self, full_path: str) -> bool: if full_path.endswith('/word/document.xml'): return self.__remove_revisions(full_path) return True @@ -214,21 +214,21 @@ class LibreOfficeParser(ArchiveBasedAbstractParser): 'application/vnd.oasis.opendocument.image', } files_to_keep = { - 'META-INF/manifest.xml', - 'content.xml', - 'manifest.rdf', - 'mimetype', - 'settings.xml', - 'styles.xml', + 'META-INF/manifest.xml', + 'content.xml', + 'manifest.rdf', + 'mimetype', + 'settings.xml', + 'styles.xml', } files_to_omit = set(map(re.compile, { # type: ignore - '^meta\.xml$', - '^Configurations2/', - '^Thumbnails/', + r'^meta\.xml$', + '^Configurations2/', + '^Thumbnails/', })) - def __remove_revisions(self, full_path:str) -> bool: + def __remove_revisions(self, full_path: str) -> bool: tree, ns = _parse_xml(full_path) if 'office' not in ns.keys(): # no revisions in the current file @@ -242,7 +242,7 @@ class LibreOfficeParser(ArchiveBasedAbstractParser): return True - def _specific_cleanup(self, full_path:str) -> bool: + def _specific_cleanup(self, full_path: str) -> bool: if os.path.basename(full_path) == 'content.xml': return self.__remove_revisions(full_path) return True diff --git a/libmat2/parser_factory.py b/libmat2/parser_factory.py index 9f4740b..7d4f43f 100644 --- a/libmat2/parser_factory.py +++ b/libmat2/parser_factory.py @@ -37,10 +37,10 @@ def get_parser(filename: str) -> Tuple[Optional[T], Optional[str]]: if extension in unsupported_extensions: return None, mtype - for c in _get_parsers(): # type: ignore - if mtype in c.mimetypes: + for parser_class in _get_parsers(): # type: ignore + if mtype in parser_class.mimetypes: try: - return c(filename), mtype + return parser_class(filename), mtype except ValueError: return None, mtype return None, mtype diff --git a/libmat2/pdf.py b/libmat2/pdf.py index 300fd4a..fa7f764 100644 --- a/libmat2/pdf.py +++ b/libmat2/pdf.py @@ -83,7 +83,9 @@ class PDFParser(abstract.AbstractParser): page_width, page_height = page.get_size() logging.info("Rendering page %d/%d", pagenum + 1, pages_count) - img_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, int(page_width) * self.__scale, int(page_height) * self.__scale) + width = int(page_width) * self.__scale + height = int(page_height) * self.__scale + img_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, width, height) img_context = cairo.Context(img_surface) img_context.scale(self.__scale, self.__scale) diff --git a/libmat2/torrent.py b/libmat2/torrent.py index b598065..ad49f47 100644 --- a/libmat2/torrent.py +++ b/libmat2/torrent.py @@ -125,7 +125,7 @@ class _BencodeHandler(object): try: r, l = self.__decode_func[s[0]](s) except (IndexError, KeyError, ValueError) as e: - logging.debug("Not a valid bencoded string: %s" % e) + logging.debug("Not a valid bencoded string: %s", e) return None if l != b'': logging.debug("Invalid bencoded value (data after valid prefix)")