From 2ba38dd2a18ab57ed7aac7ccdd6a42ff5e4d4eb7 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 12 Oct 2018 11:58:01 +0200 Subject: [PATCH] Bump mypy typing coverage --- libmat2/__init__.py | 5 +++-- libmat2/abstract.py | 4 ++-- libmat2/archive.py | 3 ++- libmat2/audio.py | 28 +++++++++++++++++----------- libmat2/harmless.py | 4 ++-- libmat2/images.py | 20 ++++++++++---------- libmat2/office.py | 6 +++--- libmat2/pdf.py | 3 ++- libmat2/torrent.py | 2 +- mat2 | 18 ++++++++++-------- 10 files changed, 52 insertions(+), 41 deletions(-) diff --git a/libmat2/__init__.py b/libmat2/__init__.py index fbb61bc..f55a14c 100644 --- a/libmat2/__init__.py +++ b/libmat2/__init__.py @@ -8,6 +8,7 @@ from typing import Dict, Optional # make pyflakes happy assert Dict +assert Optional # A set of extension that aren't supported, despite matching a supported mimetype UNSUPPORTED_EXTENSIONS = { @@ -36,7 +37,7 @@ DEPENDENCIES = { 'mutagen': 'Mutagen', } -def _get_exiftool_path() -> Optional[str]: # pragma: no cover +def _get_exiftool_path() -> str: # pragma: no cover exiftool_path = '/usr/bin/exiftool' if os.path.isfile(exiftool_path): if os.access(exiftool_path, os.X_OK): @@ -48,7 +49,7 @@ def _get_exiftool_path() -> Optional[str]: # pragma: no cover if os.access(exiftool_path, os.X_OK): return exiftool_path - return None + raise ValueError def check_dependencies() -> dict: ret = collections.defaultdict(bool) # type: Dict[str, bool] diff --git a/libmat2/abstract.py b/libmat2/abstract.py index 5bcaa69..0084796 100644 --- a/libmat2/abstract.py +++ b/libmat2/abstract.py @@ -1,6 +1,6 @@ import abc import os -from typing import Set, Dict +from typing import Set, Dict, Union assert Set # make pyflakes happy @@ -22,7 +22,7 @@ class AbstractParser(abc.ABC): self.lightweight_cleaning = False @abc.abstractmethod - def get_meta(self) -> Dict[str, str]: + def get_meta(self) -> Dict[str, Union[str, dict]]: pass # pragma: no cover @abc.abstractmethod diff --git a/libmat2/archive.py b/libmat2/archive.py index 016142d..f788ecc 100644 --- a/libmat2/archive.py +++ b/libmat2/archive.py @@ -4,13 +4,14 @@ import tempfile import os import logging import shutil -from typing import Dict, Set, Pattern +from typing import Dict, Set, Pattern, Union from . import abstract, UnknownMemberPolicy, parser_factory # Make pyflakes happy assert Set assert Pattern +assert Union class ArchiveBasedAbstractParser(abstract.AbstractParser): diff --git a/libmat2/audio.py b/libmat2/audio.py index b67f766..bfe7f79 100644 --- a/libmat2/audio.py +++ b/libmat2/audio.py @@ -2,6 +2,7 @@ import mimetypes import os import shutil import tempfile +from typing import Dict, Union import mutagen @@ -16,13 +17,13 @@ class MutagenParser(abstract.AbstractParser): except mutagen.MutagenError: raise ValueError - def get_meta(self): + def get_meta(self) -> Dict[str, Union[str, dict]]: f = mutagen.File(self.filename) if f.tags: return {k:', '.join(v) for k, v in f.tags.items()} return {} - def remove_all(self): + def remove_all(self) -> bool: shutil.copy(self.filename, self.output_filename) f = mutagen.File(self.output_filename) f.delete() @@ -33,8 +34,8 @@ class MutagenParser(abstract.AbstractParser): class MP3Parser(MutagenParser): mimetypes = {'audio/mpeg', } - def get_meta(self): - metadata = {} + def get_meta(self) -> Dict[str, Union[str, dict]]: + metadata = {} # type: Dict[str, Union[str, dict]] meta = mutagen.File(self.filename).tags for key in meta: metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text)) @@ -48,7 +49,7 @@ class OGGParser(MutagenParser): class FLACParser(MutagenParser): mimetypes = {'audio/flac', 'audio/x-flac'} - def remove_all(self): + def remove_all(self) -> bool: shutil.copy(self.filename, self.output_filename) f = mutagen.File(self.output_filename) f.clear_pictures() @@ -56,16 +57,21 @@ class FLACParser(MutagenParser): f.save(deleteid3=True) return True - def get_meta(self): + def get_meta(self) -> Dict[str, Union[str, dict]]: meta = super().get_meta() for num, picture in enumerate(mutagen.File(self.filename).pictures): name = picture.desc if picture.desc else 'Cover %d' % num + extension = mimetypes.guess_extension(picture.mime) + if extension is None: # pragma: no cover + meta[name] = 'harmful data' + continue + _, fname = tempfile.mkstemp() + fname = fname + extension with open(fname, 'wb') as f: f.write(picture.data) - extension = mimetypes.guess_extension(picture.mime) - shutil.move(fname, fname + extension) - p, _ = parser_factory.get_parser(fname+extension) - meta[name] = p.get_meta() if p else 'harmful data' - os.remove(fname + extension) + p, _ = parser_factory.get_parser(fname) # type: ignore + # Mypy chokes on ternaries :/ + meta[name] = p.get_meta() if p else 'harmful data' # type: ignore + os.remove(fname) return meta diff --git a/libmat2/harmless.py b/libmat2/harmless.py index f646099..fad0ef8 100644 --- a/libmat2/harmless.py +++ b/libmat2/harmless.py @@ -1,5 +1,5 @@ import shutil -from typing import Dict +from typing import Dict, Union from . import abstract @@ -7,7 +7,7 @@ class HarmlessParser(abstract.AbstractParser): """ This is the parser for filetypes that can not contain metadata. """ mimetypes = {'text/plain', 'image/x-ms-bmp'} - def get_meta(self) -> Dict[str, str]: + def get_meta(self) -> Dict[str, Union[str, dict]]: return dict() def remove_all(self) -> bool: diff --git a/libmat2/images.py b/libmat2/images.py index 8f7a98d..a29cbb7 100644 --- a/libmat2/images.py +++ b/libmat2/images.py @@ -5,7 +5,7 @@ import os import shutil import tempfile import re -from typing import Set +from typing import Set, Dict, Union import cairo @@ -25,7 +25,7 @@ class _ImageParser(abstract.AbstractParser): meta_whitelist = set() # type: Set[str] @staticmethod - def __handle_problematic_filename(filename: str, callback) -> str: + def __handle_problematic_filename(filename: str, callback) -> bytes: """ This method takes a filename with a problematic name, and safely applies it a `callback`.""" tmpdirname = tempfile.mkdtemp() @@ -35,7 +35,7 @@ class _ImageParser(abstract.AbstractParser): shutil.rmtree(tmpdirname) return out - def get_meta(self): + def get_meta(self) -> Dict[str, Union[str, dict]]: """ There is no way to escape the leading(s) dash(es) of the current self.filename to prevent parameter injections, so we need to take care of this. @@ -71,7 +71,7 @@ class PNGParser(_ImageParser): except MemoryError: # pragma: no cover raise ValueError - def remove_all(self): + def remove_all(self) -> bool: surface = cairo.ImageSurface.create_from_png(self.filename) surface.write_to_png(self.output_filename) return True @@ -83,7 +83,12 @@ class GdkPixbufAbstractParser(_ImageParser): """ _type = '' - def remove_all(self): + def __init__(self, filename): + super().__init__(filename) + if imghdr.what(filename) != self._type: # better safe than sorry + raise ValueError + + def remove_all(self) -> bool: _, extension = os.path.splitext(self.filename) pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename) if extension.lower() == '.jpg': @@ -91,11 +96,6 @@ class GdkPixbufAbstractParser(_ImageParser): pixbuf.savev(self.output_filename, extension[1:], [], []) return True - def __init__(self, filename): - super().__init__(filename) - if imghdr.what(filename) != self._type: # better safe than sorry - raise ValueError - class JPGParser(GdkPixbufAbstractParser): _type = 'jpeg' diff --git a/libmat2/office.py b/libmat2/office.py index 32e7b75..c10664f 100644 --- a/libmat2/office.py +++ b/libmat2/office.py @@ -2,7 +2,7 @@ import logging import os import re import zipfile -from typing import Dict, Set, Pattern, Tuple +from typing import Dict, Set, Pattern, Tuple, Union import xml.etree.ElementTree as ET # type: ignore @@ -296,7 +296,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser): return True - def get_meta(self) -> Dict[str, str]: + def get_meta(self) -> Dict[str, Union[str, dict]]: """ Yes, I know that parsing xml with regexp ain't pretty, be my guest and fix it if you want. @@ -381,7 +381,7 @@ class LibreOfficeParser(ArchiveBasedAbstractParser): return False return True - def get_meta(self) -> Dict[str, str]: + def get_meta(self) -> Dict[str, Union[str, dict]]: """ Yes, I know that parsing xml with regexp ain't pretty, be my guest and fix it if you want. diff --git a/libmat2/pdf.py b/libmat2/pdf.py index 140b4f4..17cd61e 100644 --- a/libmat2/pdf.py +++ b/libmat2/pdf.py @@ -7,6 +7,7 @@ import re import logging import tempfile import io +from typing import Dict, Union from distutils.version import LooseVersion import cairo @@ -130,7 +131,7 @@ class PDFParser(abstract.AbstractParser): metadata[key] = value return metadata - def get_meta(self): + def get_meta(self) -> Dict[str, Union[str, dict]]: """ Return a dict with all the meta of the file """ metadata = {} diff --git a/libmat2/torrent.py b/libmat2/torrent.py index c56e971..4d6c1e0 100644 --- a/libmat2/torrent.py +++ b/libmat2/torrent.py @@ -14,7 +14,7 @@ class TorrentParser(abstract.AbstractParser): if self.dict_repr is None: raise ValueError - def get_meta(self) -> Dict[str, str]: + def get_meta(self) -> Dict[str, Union[str, dict]]: metadata = {} for key, value in self.dict_repr.items(): if key not in self.whitelist: diff --git a/mat2 b/mat2 index ba1f0ac..a7a3e73 100755 --- a/mat2 +++ b/mat2 @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import os -from typing import Tuple, Generator, List +from typing import Tuple, Generator, List, Union import sys import mimetypes import argparse @@ -18,6 +18,7 @@ __version__ = '0.4.0' # Make pyflakes happy assert Tuple +assert Union def __check_file(filename: str, mode: int=os.R_OK) -> bool: @@ -98,12 +99,12 @@ def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy) return p.remove_all() -def show_parsers(): +def show_parsers() -> bool: print('[+] Supported formats:') - formats = set() - for parser in parser_factory._get_parsers(): + formats = set() # Set[str] + for parser in parser_factory._get_parsers(): # type: ignore for mtype in parser.mimetypes: - extensions = set() + extensions = set() # Set[str] for extension in mimetypes.guess_all_extensions(mtype): if extension not in UNSUPPORTED_EXTENSIONS: extensions.add(extension) @@ -113,6 +114,7 @@ def show_parsers(): continue formats.add(' - %s (%s)' % (mtype, ', '.join(extensions))) print('\n'.join(sorted(formats))) + return True def __get_files_recursively(files: List[str]) -> Generator[str, None, None]: @@ -126,7 +128,7 @@ def __get_files_recursively(files: List[str]) -> Generator[str, None, None]: elif __check_file(f): yield f -def main(): +def main() -> int: arg_parser = create_arg_parser() args = arg_parser.parse_args() @@ -135,13 +137,13 @@ def main(): if not args.files: if args.list: - show_parsers() + return show_parsers() elif args.check_dependencies: print("Dependencies required for MAT2 %s:" % __version__) for key, value in sorted(check_dependencies().items()): print('- %s: %s' % (key, 'yes' if value else 'no')) else: - return arg_parser.print_help() + arg_parser.print_help() return 0 elif args.show: