1
0
Fork 0

Fix the type annotations

This commit is contained in:
jvoisin 2023-01-28 15:57:20 +00:00
parent 1f73a16ef3
commit 39fb254e01
14 changed files with 81 additions and 77 deletions

View File

@ -2,7 +2,7 @@
import enum import enum
import importlib import importlib
from typing import Optional, Union from typing import Optional, Union, Dict
from . import exiftool, video from . import exiftool, video
@ -66,8 +66,9 @@ CMD_DEPENDENCIES = {
}, },
} }
def check_dependencies() -> dict[str, dict[str, bool]]:
ret = dict() # type: dict[str, dict] def check_dependencies() -> Dict[str, Dict[str, bool]]:
ret = dict() # type: Dict[str, Dict]
for key, value in DEPENDENCIES.items(): for key, value in DEPENDENCIES.items():
ret[key] = { ret[key] = {

View File

@ -1,7 +1,7 @@
import abc import abc
import os import os
import re import re
from typing import Union from typing import Union, Set, Dict
class AbstractParser(abc.ABC): class AbstractParser(abc.ABC):
@ -9,8 +9,8 @@ class AbstractParser(abc.ABC):
It might yield `ValueError` on instantiation on invalid files, It might yield `ValueError` on instantiation on invalid files,
and `RuntimeError` when something went wrong in `remove_all`. and `RuntimeError` when something went wrong in `remove_all`.
""" """
meta_list = set() # type: set[str] meta_list = set() # type: Set[str]
mimetypes = set() # type: set[str] mimetypes = set() # type: Set[str]
def __init__(self, filename: str) -> None: def __init__(self, filename: str) -> None:
""" """
@ -33,7 +33,7 @@ class AbstractParser(abc.ABC):
self.sandbox = True self.sandbox = True
@abc.abstractmethod @abc.abstractmethod
def get_meta(self) -> dict[str, Union[str, dict]]: def get_meta(self) -> Dict[str, Union[str, Dict]]:
"""Return all the metadata of the current file""" """Return all the metadata of the current file"""
@abc.abstractmethod @abc.abstractmethod

View File

@ -7,7 +7,7 @@ import tempfile
import os import os
import logging import logging
import shutil import shutil
from typing import Pattern, Union, Any from typing import Pattern, Union, Any, Set, Dict, List
from . import abstract, UnknownMemberPolicy, parser_factory from . import abstract, UnknownMemberPolicy, parser_factory
@ -44,16 +44,16 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
def __init__(self, filename): def __init__(self, filename):
super().__init__(filename) super().__init__(filename)
# We ignore typing here because mypy is too stupid # We ignore typing here because mypy is too stupid
self.archive_class = None # type: ignore self.archive_class = None # type: ignore
self.member_class = None # type: ignore self.member_class = None # type: ignore
# Those are the files that have a format that _isn't_ # Those are the files that have a format that _isn't_
# supported by mat2, but that we want to keep anyway. # supported by mat2, but that we want to keep anyway.
self.files_to_keep = set() # type: set[Pattern] self.files_to_keep = set() # type: Set[Pattern]
# Those are the files that we _do not_ want to keep, # Those are the files that we _do not_ want to keep,
# no matter if they are supported or not. # no matter if they are supported or not.
self.files_to_omit = set() # type: set[Pattern] self.files_to_omit = set() # type: Set[Pattern]
# what should the parser do if it encounters an unknown file in # what should the parser do if it encounters an unknown file in
# the archive? # the archive?
@ -72,7 +72,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# pylint: disable=unused-argument # pylint: disable=unused-argument
return True # pragma: no cover return True # pragma: no cover
def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]: def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
""" This method can be used to extract specific metadata """ This method can be used to extract specific metadata
from files present in the archive.""" from files present in the archive."""
# pylint: disable=unused-argument # pylint: disable=unused-argument
@ -87,7 +87,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
@staticmethod @staticmethod
@abc.abstractmethod @abc.abstractmethod
def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]: def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
"""Return all the members of the archive.""" """Return all the members of the archive."""
@staticmethod @staticmethod
@ -97,7 +97,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
@staticmethod @staticmethod
@abc.abstractmethod @abc.abstractmethod
def _get_member_meta(member: ArchiveMember) -> dict[str, str]: def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
"""Return all the metadata of a given member.""" """Return all the metadata of a given member."""
@staticmethod @staticmethod
@ -128,8 +128,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# pylint: disable=unused-argument # pylint: disable=unused-argument
return member return member
def get_meta(self) -> dict[str, Union[str, dict]]: def get_meta(self) -> dict[str, Union[str, Dict]]:
meta = dict() # type: dict[str, Union[str, dict]] meta = dict() # type: Dict[str, Union[str, Dict]]
with self.archive_class(self.filename) as zin: with self.archive_class(self.filename) as zin:
temp_folder = tempfile.mkdtemp() temp_folder = tempfile.mkdtemp()
@ -264,6 +264,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
class TarParser(ArchiveBasedAbstractParser): class TarParser(ArchiveBasedAbstractParser):
mimetypes = {'application/x-tar'} mimetypes = {'application/x-tar'}
def __init__(self, filename): def __init__(self, filename):
super().__init__(filename) super().__init__(filename)
# yes, it's tarfile.open and not tarfile.TarFile, # yes, it's tarfile.open and not tarfile.TarFile,
@ -336,7 +337,7 @@ class TarParser(ArchiveBasedAbstractParser):
return member return member
@staticmethod @staticmethod
def _get_member_meta(member: ArchiveMember) -> dict[str, str]: def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
assert isinstance(member, tarfile.TarInfo) # please mypy assert isinstance(member, tarfile.TarInfo) # please mypy
metadata = {} metadata = {}
if member.mtime != 0: if member.mtime != 0:
@ -358,7 +359,7 @@ class TarParser(ArchiveBasedAbstractParser):
archive.add(full_path, member.name, filter=TarParser._clean_member) # type: ignore archive.add(full_path, member.name, filter=TarParser._clean_member) # type: ignore
@staticmethod @staticmethod
def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]: def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
assert isinstance(archive, tarfile.TarFile) # please mypy assert isinstance(archive, tarfile.TarFile) # please mypy
return archive.getmembers() # type: ignore return archive.getmembers() # type: ignore
@ -391,7 +392,8 @@ class TarXzParser(TarParser):
class ZipParser(ArchiveBasedAbstractParser): class ZipParser(ArchiveBasedAbstractParser):
mimetypes = {'application/zip'} mimetypes = {'application/zip'}
def __init__(self, filename):
def __init__(self, filename: str):
super().__init__(filename) super().__init__(filename)
self.archive_class = zipfile.ZipFile self.archive_class = zipfile.ZipFile
self.member_class = zipfile.ZipInfo self.member_class = zipfile.ZipInfo
@ -412,7 +414,7 @@ class ZipParser(ArchiveBasedAbstractParser):
return member return member
@staticmethod @staticmethod
def _get_member_meta(member: ArchiveMember) -> dict[str, str]: def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
assert isinstance(member, zipfile.ZipInfo) # please mypy assert isinstance(member, zipfile.ZipInfo) # please mypy
metadata = {} metadata = {}
if member.create_system == 3: # this is Linux if member.create_system == 3: # this is Linux
@ -439,7 +441,7 @@ class ZipParser(ArchiveBasedAbstractParser):
compress_type=member.compress_type) compress_type=member.compress_type)
@staticmethod @staticmethod
def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]: def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
assert isinstance(archive, zipfile.ZipFile) # please mypy assert isinstance(archive, zipfile.ZipFile) # please mypy
return archive.infolist() # type: ignore return archive.infolist() # type: ignore

View File

@ -2,7 +2,7 @@ import mimetypes
import os import os
import shutil import shutil
import tempfile import tempfile
from typing import Union from typing import Union, Dict
import mutagen import mutagen
@ -18,10 +18,10 @@ class MutagenParser(abstract.AbstractParser):
except mutagen.MutagenError: except mutagen.MutagenError:
raise ValueError raise ValueError
def get_meta(self) -> dict[str, Union[str, dict]]: def get_meta(self) -> Dict[str, Union[str, Dict]]:
f = mutagen.File(self.filename) f = mutagen.File(self.filename)
if f.tags: if f.tags:
return {k:', '.join(map(str, v)) for k, v in f.tags.items()} return {k: ', '.join(map(str, v)) for k, v in f.tags.items()}
return {} return {}
def remove_all(self) -> bool: def remove_all(self) -> bool:
@ -38,8 +38,8 @@ class MutagenParser(abstract.AbstractParser):
class MP3Parser(MutagenParser): class MP3Parser(MutagenParser):
mimetypes = {'audio/mpeg', } mimetypes = {'audio/mpeg', }
def get_meta(self) -> dict[str, Union[str, dict]]: def get_meta(self) -> Dict[str, Union[str, Dict]]:
metadata = {} # type: dict[str, Union[str, dict]] metadata = {} # type: Dict[str, Union[str, Dict]]
meta = mutagen.File(self.filename).tags meta = mutagen.File(self.filename).tags
if not meta: if not meta:
return metadata return metadata
@ -68,12 +68,12 @@ class FLACParser(MutagenParser):
f.save(deleteid3=True) f.save(deleteid3=True)
return True return True
def get_meta(self) -> dict[str, Union[str, dict]]: def get_meta(self) -> Dict[str, Union[str, Dict]]:
meta = super().get_meta() meta = super().get_meta()
for num, picture in enumerate(mutagen.File(self.filename).pictures): for num, picture in enumerate(mutagen.File(self.filename).pictures):
name = picture.desc if picture.desc else 'Cover %d' % num name = picture.desc if picture.desc else 'Cover %d' % num
extension = mimetypes.guess_extension(picture.mime) extension = mimetypes.guess_extension(picture.mime)
if extension is None: # pragma: no cover if extension is None: # pragma: no cover
meta[name] = 'harmful data' meta[name] = 'harmful data'
continue continue
@ -98,6 +98,7 @@ class WAVParser(video.AbstractFFmpegParser):
'MIMEType', 'NumChannels', 'SampleRate', 'SourceFile', 'MIMEType', 'NumChannels', 'SampleRate', 'SourceFile',
} }
class AIFFParser(video.AbstractFFmpegParser): class AIFFParser(video.AbstractFFmpegParser):
mimetypes = {'audio/aiff', 'audio/x-aiff'} mimetypes = {'audio/aiff', 'audio/x-aiff'}
meta_allowlist = {'AvgBytesPerSec', 'BitsPerSample', 'Directory', meta_allowlist = {'AvgBytesPerSec', 'BitsPerSample', 'Directory',

View File

@ -12,7 +12,7 @@ import shutil
import subprocess import subprocess
import tempfile import tempfile
import functools import functools
from typing import Optional from typing import Optional, List
__all__ = ['PIPE', 'run', 'CalledProcessError'] __all__ = ['PIPE', 'run', 'CalledProcessError']
@ -33,7 +33,7 @@ def _get_bwrap_path() -> str:
def _get_bwrap_args(tempdir: str, def _get_bwrap_args(tempdir: str,
input_filename: str, input_filename: str,
output_filename: Optional[str] = None) -> list[str]: output_filename: Optional[str] = None) -> List[str]:
ro_bind_args = [] ro_bind_args = []
cwd = os.getcwd() cwd = os.getcwd()

View File

@ -3,10 +3,11 @@ import re
import uuid import uuid
import zipfile import zipfile
import xml.etree.ElementTree as ET # type: ignore import xml.etree.ElementTree as ET # type: ignore
from typing import Any from typing import Any, Dict
from . import archive, office from . import archive, office
class EPUBParser(archive.ZipParser): class EPUBParser(archive.ZipParser):
mimetypes = {'application/epub+zip', } mimetypes = {'application/epub+zip', }
metadata_namespace = '{http://purl.org/dc/elements/1.1/}' metadata_namespace = '{http://purl.org/dc/elements/1.1/}'
@ -28,7 +29,6 @@ class EPUBParser(archive.ZipParser):
})) }))
self.uniqid = uuid.uuid4() self.uniqid = uuid.uuid4()
def is_archive_valid(self): def is_archive_valid(self):
super().is_archive_valid() super().is_archive_valid()
with zipfile.ZipFile(self.filename) as zin: with zipfile.ZipFile(self.filename) as zin:
@ -37,7 +37,7 @@ class EPUBParser(archive.ZipParser):
if member_name.endswith('META-INF/encryption.xml'): if member_name.endswith('META-INF/encryption.xml'):
raise ValueError('the file contains encrypted fonts') raise ValueError('the file contains encrypted fonts')
def _specific_get_meta(self, full_path, file_path) -> dict[str, Any]: def _specific_get_meta(self, full_path, file_path) -> Dict[str, Any]:
if not file_path.endswith('.opf'): if not file_path.endswith('.opf'):
return {} return {}
@ -73,7 +73,6 @@ class EPUBParser(archive.ZipParser):
short_empty_elements=False) short_empty_elements=False)
return True return True
def __handle_tocncx(self, full_path: str) -> bool: def __handle_tocncx(self, full_path: str) -> bool:
try: try:
tree, namespace = office._parse_xml(full_path) tree, namespace = office._parse_xml(full_path)

View File

@ -4,7 +4,7 @@ import logging
import os import os
import shutil import shutil
import subprocess import subprocess
from typing import Union from typing import Union, Set, Dict
from . import abstract from . import abstract
from . import bubblewrap from . import bubblewrap
@ -15,9 +15,9 @@ class ExiftoolParser(abstract.AbstractParser):
from a import file, hence why several parsers are re-using its `get_meta` from a import file, hence why several parsers are re-using its `get_meta`
method. method.
""" """
meta_allowlist = set() # type: set[str] meta_allowlist = set() # type: Set[str]
def get_meta(self) -> dict[str, Union[str, dict]]: def get_meta(self) -> Dict[str, Union[str, Dict]]:
try: try:
if self.sandbox: if self.sandbox:
out = bubblewrap.run([_get_exiftool_path(), '-json', out = bubblewrap.run([_get_exiftool_path(), '-json',

View File

@ -1,5 +1,5 @@
import shutil import shutil
from typing import Union from typing import Union, Dict
from . import abstract from . import abstract
@ -7,7 +7,7 @@ class HarmlessParser(abstract.AbstractParser):
""" This is the parser for filetypes that can not contain metadata. """ """ This is the parser for filetypes that can not contain metadata. """
mimetypes = {'text/plain', 'image/x-ms-bmp'} mimetypes = {'text/plain', 'image/x-ms-bmp'}
def get_meta(self) -> dict[str, Union[str, dict]]: def get_meta(self) -> Dict[str, Union[str, Dict]]:
return dict() return dict()
def remove_all(self) -> bool: def remove_all(self) -> bool:

View File

@ -1,6 +1,6 @@
import os import os
import re import re
from typing import Union, Any from typing import Union, Any, Dict
import cairo import cairo
@ -48,7 +48,7 @@ class SVGParser(exiftool.ExiftoolParser):
surface.finish() surface.finish()
return True return True
def get_meta(self) -> dict[str, Union[str, dict]]: def get_meta(self) -> Dict[str, Union[str, Dict]]:
meta = super().get_meta() meta = super().get_meta()
# The namespace is mandatory, but only the …/2000/svg is valid. # The namespace is mandatory, but only the …/2000/svg is valid.
@ -57,6 +57,7 @@ class SVGParser(exiftool.ExiftoolParser):
meta.pop('Xmlns') meta.pop('Xmlns')
return meta return meta
class PNGParser(exiftool.ExiftoolParser): class PNGParser(exiftool.ExiftoolParser):
mimetypes = {'image/png', } mimetypes = {'image/png', }
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
@ -156,11 +157,12 @@ class TiffParser(GdkPixbufAbstractParser):
'FileTypeExtension', 'ImageHeight', 'ImageSize', 'FileTypeExtension', 'ImageHeight', 'ImageSize',
'ImageWidth', 'MIMEType', 'Megapixels', 'SourceFile'} 'ImageWidth', 'MIMEType', 'Megapixels', 'SourceFile'}
class PPMParser(abstract.AbstractParser): class PPMParser(abstract.AbstractParser):
mimetypes = {'image/x-portable-pixmap'} mimetypes = {'image/x-portable-pixmap'}
def get_meta(self) -> dict[str, Union[str, dict]]: def get_meta(self) -> Dict[str, Union[str, Dict]]:
meta = {} # type: dict[str, Union[str, dict[Any, Any]]] meta = {} # type: Dict[str, Union[str, Dict[Any, Any]]]
with open(self.filename) as f: with open(self.filename) as f:
for idx, line in enumerate(f): for idx, line in enumerate(f):
if line.lstrip().startswith('#'): if line.lstrip().startswith('#'):
@ -176,9 +178,10 @@ class PPMParser(abstract.AbstractParser):
fout.write(line) fout.write(line)
return True return True
class HEICParser(exiftool.ExiftoolParser): class HEICParser(exiftool.ExiftoolParser):
mimetypes = {'image/heic'} mimetypes = {'image/heic'}
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName','Directory', meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
'FileSize', 'FileModifyDate', 'FileAccessDate', 'FileSize', 'FileModifyDate', 'FileAccessDate',
'FileInodeChangeDate', 'FilePermissions', 'FileType', 'FileInodeChangeDate', 'FilePermissions', 'FileType',
'FileTypeExtension', 'MIMEType', 'MajorBrand', 'MinorVersion', 'FileTypeExtension', 'MIMEType', 'MajorBrand', 'MinorVersion',

View File

@ -4,7 +4,7 @@ import logging
import os import os
import re import re
import zipfile import zipfile
from typing import Pattern, Any from typing import Pattern, Any, Tuple, Dict
import xml.etree.ElementTree as ET # type: ignore import xml.etree.ElementTree as ET # type: ignore
@ -12,7 +12,8 @@ from .archive import ZipParser
# pylint: disable=line-too-long # pylint: disable=line-too-long
def _parse_xml(full_path: str) -> tuple[ET.ElementTree, dict[str, str]]:
def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]:
""" This function parses XML, with namespace support. """ """ This function parses XML, with namespace support. """
namespace_map = dict() namespace_map = dict()
for _, (key, value) in ET.iterparse(full_path, ("start-ns", )): for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
@ -68,7 +69,6 @@ class MSOfficeParser(ZipParser):
'application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml', 'application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml',
} }
def __init__(self, filename): def __init__(self, filename):
super().__init__(filename) super().__init__(filename)
@ -218,7 +218,7 @@ class MSOfficeParser(ZipParser):
if 'w' not in namespace: if 'w' not in namespace:
return True return True
parent_map = {c:p for p in tree.iter() for c in p} parent_map = {c: p for p in tree.iter() for c in p}
elements_to_remove = list() elements_to_remove = list()
for element in tree.iterfind('.//w:nsid', namespace): for element in tree.iterfind('.//w:nsid', namespace):
@ -229,7 +229,6 @@ class MSOfficeParser(ZipParser):
tree.write(full_path, xml_declaration=True) tree.write(full_path, xml_declaration=True)
return True return True
@staticmethod @staticmethod
def __remove_revisions(full_path: str) -> bool: def __remove_revisions(full_path: str) -> bool:
try: try:
@ -319,7 +318,6 @@ class MSOfficeParser(ZipParser):
for i in re.findall(r'<p:cNvPr id="([0-9]+)"', content): for i in re.findall(r'<p:cNvPr id="([0-9]+)"', content):
self.__counters['cNvPr'].add(int(i)) self.__counters['cNvPr'].add(int(i))
@staticmethod @staticmethod
def __randomize_creationId(full_path: str) -> bool: def __randomize_creationId(full_path: str) -> bool:
try: try:
@ -441,8 +439,8 @@ class MSOfficeParser(ZipParser):
with open(full_path, encoding='utf-8') as f: with open(full_path, encoding='utf-8') as f:
try: try:
results = re.findall(r"<(.+)>(.+)</\1>", f.read(), re.I|re.M) results = re.findall(r"<(.+)>(.+)</\1>", f.read(), re.I | re.M)
return {k:v for (k, v) in results} return {k: v for (k, v) in results}
except (TypeError, UnicodeDecodeError): except (TypeError, UnicodeDecodeError):
# We didn't manage to parse the xml file # We didn't manage to parse the xml file
return {file_path: 'harmful content', } return {file_path: 'harmful content', }
@ -459,7 +457,6 @@ class LibreOfficeParser(ZipParser):
'application/vnd.oasis.opendocument.image', 'application/vnd.oasis.opendocument.image',
} }
def __init__(self, filename): def __init__(self, filename):
super().__init__(filename) super().__init__(filename)

View File

@ -7,7 +7,7 @@ import re
import logging import logging
import tempfile import tempfile
import io import io
from typing import Union from typing import Union, Dict
import cairo import cairo
import gi import gi
@ -18,6 +18,7 @@ from . import abstract
FIXED_PDF_VERSION = cairo.PDFVersion.VERSION_1_5 FIXED_PDF_VERSION = cairo.PDFVersion.VERSION_1_5
class PDFParser(abstract.AbstractParser): class PDFParser(abstract.AbstractParser):
mimetypes = {'application/pdf', } mimetypes = {'application/pdf', }
meta_list = {'author', 'creation-date', 'creator', 'format', 'keywords', meta_list = {'author', 'creation-date', 'creator', 'format', 'keywords',
@ -140,13 +141,13 @@ class PDFParser(abstract.AbstractParser):
return True return True
@staticmethod @staticmethod
def __parse_metadata_field(data: str) -> dict[str, str]: def __parse_metadata_field(data: str) -> Dict[str, str]:
metadata = {} metadata = {}
for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I): for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I):
metadata[key] = value metadata[key] = value
return metadata return metadata
def get_meta(self) -> dict[str, Union[str, dict]]: def get_meta(self) -> Dict[str, Union[str, Dict]]:
""" Return a dict with all the meta of the file """ Return a dict with all the meta of the file
""" """
metadata = {} metadata = {}

View File

@ -1,5 +1,5 @@
import logging import logging
from typing import Union from typing import Union, Dict, List, Tuple
from . import abstract from . import abstract
@ -15,7 +15,7 @@ class TorrentParser(abstract.AbstractParser):
if self.dict_repr is None: if self.dict_repr is None:
raise ValueError raise ValueError
def get_meta(self) -> dict[str, Union[str, dict]]: def get_meta(self) -> Dict[str, Union[str, Dict]]:
metadata = {} metadata = {}
for key, value in self.dict_repr.items(): for key, value in self.dict_repr.items():
if key not in self.allowlist: if key not in self.allowlist:
@ -56,7 +56,7 @@ class _BencodeHandler:
} }
@staticmethod @staticmethod
def __decode_int(s: bytes) -> tuple[int, bytes]: def __decode_int(s: bytes) -> Tuple[int, bytes]:
s = s[1:] s = s[1:]
next_idx = s.index(b'e') next_idx = s.index(b'e')
if s.startswith(b'-0'): if s.startswith(b'-0'):
@ -66,7 +66,7 @@ class _BencodeHandler:
return int(s[:next_idx]), s[next_idx+1:] return int(s[:next_idx]), s[next_idx+1:]
@staticmethod @staticmethod
def __decode_string(s: bytes) -> tuple[bytes, bytes]: def __decode_string(s: bytes) -> Tuple[bytes, bytes]:
colon = s.index(b':') colon = s.index(b':')
# FIXME Python3 is broken here, the call to `ord` shouldn't be needed, # FIXME Python3 is broken here, the call to `ord` shouldn't be needed,
# but apparently it is. This is utterly idiotic. # but apparently it is. This is utterly idiotic.
@ -76,7 +76,7 @@ class _BencodeHandler:
s = s[1:] s = s[1:]
return s[colon:colon+str_len], s[colon+str_len:] return s[colon:colon+str_len], s[colon+str_len:]
def __decode_list(self, s: bytes) -> tuple[list, bytes]: def __decode_list(self, s: bytes) -> Tuple[List, bytes]:
ret = list() ret = list()
s = s[1:] # skip leading `l` s = s[1:] # skip leading `l`
while s[0] != ord('e'): while s[0] != ord('e'):
@ -84,7 +84,7 @@ class _BencodeHandler:
ret.append(value) ret.append(value)
return ret, s[1:] return ret, s[1:]
def __decode_dict(self, s: bytes) -> tuple[dict, bytes]: def __decode_dict(self, s: bytes) -> Tuple[Dict, bytes]:
ret = dict() ret = dict()
s = s[1:] # skip leading `d` s = s[1:] # skip leading `d`
while s[0] != ord(b'e'): while s[0] != ord(b'e'):
@ -113,10 +113,10 @@ class _BencodeHandler:
ret += self.__encode_func[type(value)](value) ret += self.__encode_func[type(value)](value)
return b'd' + ret + b'e' return b'd' + ret + b'e'
def bencode(self, s: Union[dict, list, bytes, int]) -> bytes: def bencode(self, s: Union[Dict, List, bytes, int]) -> bytes:
return self.__encode_func[type(s)](s) return self.__encode_func[type(s)](s)
def bdecode(self, s: bytes) -> Union[dict, None]: def bdecode(self, s: bytes) -> Union[Dict, None]:
try: try:
ret, trail = self.__decode_func[s[0]](s) ret, trail = self.__decode_func[s[0]](s)
except (IndexError, KeyError, ValueError) as e: except (IndexError, KeyError, ValueError) as e:

View File

@ -3,7 +3,7 @@ import functools
import shutil import shutil
import logging import logging
from typing import Union from typing import Union, Dict
from . import exiftool from . import exiftool
from . import bubblewrap from . import bubblewrap
@ -12,7 +12,7 @@ from . import bubblewrap
class AbstractFFmpegParser(exiftool.ExiftoolParser): class AbstractFFmpegParser(exiftool.ExiftoolParser):
""" Abstract parser for all FFmpeg-based ones, mainly for video. """ """ Abstract parser for all FFmpeg-based ones, mainly for video. """
# Some fileformats have mandatory metadata fields # Some fileformats have mandatory metadata fields
meta_key_value_allowlist = {} # type: dict[str, Union[str, int]] meta_key_value_allowlist = {} # type: Dict[str, Union[str, int]]
def remove_all(self) -> bool: def remove_all(self) -> bool:
if self.meta_key_value_allowlist: if self.meta_key_value_allowlist:
@ -45,10 +45,10 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
return False return False
return True return True
def get_meta(self) -> dict[str, Union[str, dict]]: def get_meta(self) -> Dict[str, Union[str, Dict]]:
meta = super().get_meta() meta = super().get_meta()
ret = dict() # type: dict[str, Union[str, dict]] ret = dict() # type: Dict[str, Union[str, Dict]]
for key, value in meta.items(): for key, value in meta.items():
if key in self.meta_key_value_allowlist: if key in self.meta_key_value_allowlist:
if value == self.meta_key_value_allowlist[key]: if value == self.meta_key_value_allowlist[key]:

View File

@ -1,5 +1,5 @@
from html import parser, escape from html import parser, escape
from typing import Any, Optional from typing import Any, Optional, Dict, List, Tuple, Set
import re import re
import string import string
@ -25,7 +25,7 @@ class CSSParser(abstract.AbstractParser):
f.write(cleaned) f.write(cleaned)
return True return True
def get_meta(self) -> dict[str, Any]: def get_meta(self) -> Dict[str, Any]:
metadata = {} metadata = {}
with open(self.filename, encoding='utf-8') as f: with open(self.filename, encoding='utf-8') as f:
try: try:
@ -44,10 +44,10 @@ class CSSParser(abstract.AbstractParser):
class AbstractHTMLParser(abstract.AbstractParser): class AbstractHTMLParser(abstract.AbstractParser):
tags_blocklist = set() # type: set[str] tags_blocklist = set() # type: Set[str]
# In some html/xml-based formats some tags are mandatory, # In some html/xml-based formats some tags are mandatory,
# so we're keeping them, but are discarding their content # so we're keeping them, but are discarding their content
tags_required_blocklist = set() # type: set[str] tags_required_blocklist = set() # type: Set[str]
def __init__(self, filename): def __init__(self, filename):
super().__init__(filename) super().__init__(filename)
@ -57,7 +57,7 @@ class AbstractHTMLParser(abstract.AbstractParser):
self.__parser.feed(f.read()) self.__parser.feed(f.read())
self.__parser.close() self.__parser.close()
def get_meta(self) -> dict[str, Any]: def get_meta(self) -> Dict[str, Any]:
return self.__parser.get_meta() return self.__parser.get_meta()
def remove_all(self) -> bool: def remove_all(self) -> bool:
@ -112,7 +112,7 @@ class _HTMLParser(parser.HTMLParser):
""" """
raise ValueError(message) raise ValueError(message)
def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]): def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
# Ignore the type, because mypy is too stupid to infer # Ignore the type, because mypy is too stupid to infer
# that get_starttag_text() can't return None. # that get_starttag_text() can't return None.
original_tag = self.get_starttag_text() # type: ignore original_tag = self.get_starttag_text() # type: ignore
@ -159,7 +159,7 @@ class _HTMLParser(parser.HTMLParser):
self.__textrepr += escape(data) self.__textrepr += escape(data)
def handle_startendtag(self, tag: str, def handle_startendtag(self, tag: str,
attrs: list[tuple[str, Optional[str]]]): attrs: List[Tuple[str, Optional[str]]]):
if tag in self.tag_required_blocklist | self.tag_blocklist: if tag in self.tag_required_blocklist | self.tag_blocklist:
meta = {k:v for k, v in attrs} meta = {k:v for k, v in attrs}
name = meta.get('name', 'harmful metadata') name = meta.get('name', 'harmful metadata')
@ -184,7 +184,7 @@ class _HTMLParser(parser.HTMLParser):
f.write(self.__textrepr) f.write(self.__textrepr)
return True return True
def get_meta(self) -> dict[str, Any]: def get_meta(self) -> Dict[str, Any]:
if self.__validation_queue: if self.__validation_queue:
raise ValueError("Some tags (%s) were left unclosed in %s" % ( raise ValueError("Some tags (%s) were left unclosed in %s" % (
', '.join(self.__validation_queue), ', '.join(self.__validation_queue),