Simplify the typing annotations

2025-07-04 20:37:34 +02:00 · 2022-08-28 22:29:06 +02:00 · 2022-08-28 22:29:06 +02:00 · cc5be8608b
commit cc5be8608b
parent 292f44c086
15 changed files with 69 additions and 79 deletions
--- a/libmat2/init.py
+++ b/libmat2/init.py
@ -2,12 +2,11 @@

 import enum
 import importlib
-from typing import Dict, Optional, Union
+from typing import Optional, Union

 from . import exiftool, video

 # make pyflakes happy
-assert Dict
 assert Optional
 assert Union

@ -67,8 +66,8 @@ CMD_DEPENDENCIES = {
    },
 }

-def check_dependencies() -> Dict[str, Dict[str, bool]]:
-    ret = dict()  # type: Dict[str, dict]
+def check_dependencies() -> dict[str, dict[str, bool]]:
+    ret = dict()  # type: dict[str, dict]

    for key, value in DEPENDENCIES.items():
        ret[key] = {
--- a/libmat2/abstract.py
+++ b/libmat2/abstract.py
@ -1,9 +1,7 @@
 import abc
 import os
 import re
-from typing import Set, Dict, Union
-
-assert Set  # make pyflakes happy
+from typing import Union


 class AbstractParser(abc.ABC):
@ -11,8 +9,8 @@ class AbstractParser(abc.ABC):
    It might yield `ValueError` on instantiation on invalid files,
    and `RuntimeError` when something went wrong in `remove_all`.
    """
-    meta_list = set()  # type: Set[str]
-    mimetypes = set()  # type: Set[str]
+    meta_list = set()  # type: set[str]
+    mimetypes = set()  # type: set[str]

    def __init__(self, filename: str) -> None:
        """
@ -35,7 +33,7 @@ class AbstractParser(abc.ABC):
        self.sandbox = True

    @abc.abstractmethod
-    def get_meta(self) -> Dict[str, Union[str, dict]]:
+    def get_meta(self) -> dict[str, Union[str, dict]]:
        """Return all the metadata of the current file"""

    @abc.abstractmethod
--- a/libmat2/archive.py
+++ b/libmat2/archive.py
@ -7,12 +7,11 @@ import tempfile
 import os
 import logging
 import shutil
-from typing import Dict, Set, Pattern, Union, Any, List
+from typing import Pattern, Union, Any

 from . import abstract, UnknownMemberPolicy, parser_factory

 # Make pyflakes happy
-assert Set
 assert Pattern

 # pylint: disable=not-callable,assignment-from-no-return,too-many-branches
@ -53,11 +52,11 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):

        # Those are the files that have a format that _isn't_
        # supported by mat2, but that we want to keep anyway.
-        self.files_to_keep = set()  # type: Set[Pattern]
+        self.files_to_keep = set()  # type: set[Pattern]

        # Those are the files that we _do not_ want to keep,
        # no matter if they are supported or not.
-        self.files_to_omit = set()  # type: Set[Pattern]
+        self.files_to_omit = set()  # type: set[Pattern]

        # what should the parser do if it encounters an unknown file in
        # the archive?
@ -76,7 +75,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
        # pylint: disable=unused-argument,no-self-use
        return True  # pragma: no cover

-    def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
+    def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
        """ This method can be used to extract specific metadata
        from files present in the archive."""
        # pylint: disable=unused-argument,no-self-use
@ -91,7 +90,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):

    @staticmethod
    @abc.abstractmethod
-    def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
+    def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
        """Return all the members of the archive."""

    @staticmethod
@ -101,7 +100,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):

    @staticmethod
    @abc.abstractmethod
-    def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
+    def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
        """Return all the metadata of a given member."""

    @staticmethod
@ -132,8 +131,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
        # pylint: disable=unused-argument
        return member

-    def get_meta(self) -> Dict[str, Union[str, dict]]:
-        meta = dict()  # type: Dict[str, Union[str, dict]]
+    def get_meta(self) -> dict[str, Union[str, dict]]:
+        meta = dict()  # type: dict[str, Union[str, dict]]

        with self.archive_class(self.filename) as zin:
            temp_folder = tempfile.mkdtemp()
@ -174,7 +173,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):

            # Sort the items to process, to reduce fingerprinting,
            # and keep them in the `items` variable.
-            items = list()  # type: List[ArchiveMember]
+            items = list()  # type: list[ArchiveMember]
            for item in sorted(self._get_all_members(zin), key=self._get_member_name):
                # Some fileformats do require to have the `mimetype` file
                # as the first file in the archive.
@ -340,7 +339,7 @@ class TarParser(ArchiveBasedAbstractParser):
        return member

    @staticmethod
-    def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
+    def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
        assert isinstance(member, tarfile.TarInfo)  # please mypy
        metadata = {}
        if member.mtime != 0:
@ -362,7 +361,7 @@ class TarParser(ArchiveBasedAbstractParser):
        archive.add(full_path, member.name, filter=TarParser._clean_member)  # type: ignore

    @staticmethod
-    def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
+    def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
        assert isinstance(archive, tarfile.TarFile)  # please mypy
        return archive.getmembers()  # type: ignore

@ -416,7 +415,7 @@ class ZipParser(ArchiveBasedAbstractParser):
        return member

    @staticmethod
-    def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
+    def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
        assert isinstance(member, zipfile.ZipInfo)  # please mypy
        metadata = {}
        if member.create_system == 3:  # this is Linux
@ -443,7 +442,7 @@ class ZipParser(ArchiveBasedAbstractParser):
                             compress_type=member.compress_type)

    @staticmethod
-    def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
+    def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
        assert isinstance(archive, zipfile.ZipFile)  # please mypy
        return archive.infolist()  # type: ignore

--- a/libmat2/audio.py
+++ b/libmat2/audio.py
@ -2,7 +2,7 @@ import mimetypes
 import os
 import shutil
 import tempfile
-from typing import Dict, Union
+from typing import Union

 import mutagen

@ -18,7 +18,7 @@ class MutagenParser(abstract.AbstractParser):
        except mutagen.MutagenError:
            raise ValueError

-    def get_meta(self) -> Dict[str, Union[str, dict]]:
+    def get_meta(self) -> dict[str, Union[str, dict]]:
        f = mutagen.File(self.filename)
        if f.tags:
            return {k:', '.join(map(str, v)) for k, v in f.tags.items()}
@ -38,8 +38,8 @@ class MutagenParser(abstract.AbstractParser):
 class MP3Parser(MutagenParser):
    mimetypes = {'audio/mpeg', }

-    def get_meta(self) -> Dict[str, Union[str, dict]]:
-        metadata = {}  # type: Dict[str, Union[str, dict]]
+    def get_meta(self) -> dict[str, Union[str, dict]]:
+        metadata = {}  # type: dict[str, Union[str, dict]]
        meta = mutagen.File(self.filename).tags
        if not meta:
            return metadata
@ -68,7 +68,7 @@ class FLACParser(MutagenParser):
        f.save(deleteid3=True)
        return True

-    def get_meta(self) -> Dict[str, Union[str, dict]]:
+    def get_meta(self) -> dict[str, Union[str, dict]]:
        meta = super().get_meta()
        for num, picture in enumerate(mutagen.File(self.filename).pictures):
            name = picture.desc if picture.desc else 'Cover %d' % num
--- a/libmat2/bubblewrap.py
+++ b/libmat2/bubblewrap.py
@ -12,7 +12,7 @@ import shutil
 import subprocess
 import tempfile
 import functools
-from typing import List, Optional
+from typing import Optional


 __all__ = ['PIPE', 'run', 'CalledProcessError']
@ -33,7 +33,7 @@ def _get_bwrap_path() -> str:

 def _get_bwrap_args(tempdir: str,
                    input_filename: str,
-                    output_filename: Optional[str] = None) -> List[str]:
+                    output_filename: Optional[str] = None) -> list[str]:
    ro_bind_args = []
    cwd = os.getcwd()

@ -78,7 +78,7 @@ def _get_bwrap_args(tempdir: str,
    return args


-def run(args: List[str],
+def run(args: list[str],
        input_filename: str,
        output_filename: Optional[str] = None,
        **kwargs) -> subprocess.CompletedProcess:
--- a/libmat2/epub.py
+++ b/libmat2/epub.py
@ -3,7 +3,7 @@ import re
 import uuid
 import zipfile
 import xml.etree.ElementTree as ET  # type: ignore
-from typing import Dict, Any
+from typing import Any

 from . import archive, office

@ -37,7 +37,7 @@ class EPUBParser(archive.ZipParser):
                if member_name.endswith('META-INF/encryption.xml'):
                    raise ValueError('the file contains encrypted fonts')

-    def _specific_get_meta(self, full_path, file_path) -> Dict[str, Any]:
+    def _specific_get_meta(self, full_path, file_path) -> dict[str, Any]:
        if not file_path.endswith('.opf'):
            return {}

--- a/libmat2/exiftool.py
+++ b/libmat2/exiftool.py
@ -4,23 +4,20 @@ import logging
 import os
 import shutil
 import subprocess
-from typing import Dict, Union, Set
+from typing import Union

 from . import abstract
 from . import bubblewrap

-# Make pyflakes happy
-assert Set
-

 class ExiftoolParser(abstract.AbstractParser):
    """ Exiftool is often the easiest way to get all the metadata
    from a import file, hence why several parsers are re-using its `get_meta`
    method.
    """
-    meta_allowlist = set()  # type: Set[str]
+    meta_allowlist = set()  # type: set[str]

-    def get_meta(self) -> Dict[str, Union[str, dict]]:
+    def get_meta(self) -> dict[str, Union[str, dict]]:
        try:
            if self.sandbox:
                out = bubblewrap.run([_get_exiftool_path(), '-json',
--- a/libmat2/harmless.py
+++ b/libmat2/harmless.py
@ -1,5 +1,5 @@
 import shutil
-from typing import Dict, Union
+from typing import Union
 from . import abstract


@ -7,7 +7,7 @@ class HarmlessParser(abstract.AbstractParser):
    """ This is the parser for filetypes that can not contain metadata. """
    mimetypes = {'text/plain', 'image/x-ms-bmp'}

-    def get_meta(self) -> Dict[str, Union[str, dict]]:
+    def get_meta(self) -> dict[str, Union[str, dict]]:
        return dict()

    def remove_all(self) -> bool:
--- a/libmat2/images.py
+++ b/libmat2/images.py
@ -1,7 +1,7 @@
 import imghdr
 import os
 import re
-from typing import Set, Dict, Union, Any
+from typing import Union, Any

 import cairo

@ -13,7 +13,6 @@ from gi.repository import GdkPixbuf, GLib, Rsvg
 from . import exiftool, abstract

 # Make pyflakes happy
-assert Set
 assert Any

 class SVGParser(exiftool.ExiftoolParser):
@ -50,7 +49,7 @@ class SVGParser(exiftool.ExiftoolParser):
        surface.finish()
        return True

-    def get_meta(self) -> Dict[str, Union[str, dict]]:
+    def get_meta(self) -> dict[str, Union[str, dict]]:
        meta = super().get_meta()

        # The namespace is mandatory, but only the …/2000/svg is valid.
@ -165,8 +164,8 @@ class TiffParser(GdkPixbufAbstractParser):
 class PPMParser(abstract.AbstractParser):
    mimetypes = {'image/x-portable-pixmap'}

-    def get_meta(self) -> Dict[str, Union[str, dict]]:
-        meta = {}  # type: Dict[str, Union[str, Dict[Any, Any]]]
+    def get_meta(self) -> dict[str, Union[str, dict]]:
+        meta = {}  # type: dict[str, Union[str, dict[Any, Any]]]
        with open(self.filename) as f:
            for idx, line in enumerate(f):
                if line.lstrip().startswith('#'):
--- a/libmat2/office.py
+++ b/libmat2/office.py
@ -4,7 +4,7 @@ import logging
 import os
 import re
 import zipfile
-from typing import Dict, Set, Pattern, Tuple, Any
+from typing import Pattern, Any

 import xml.etree.ElementTree as ET  # type: ignore

@ -13,10 +13,9 @@ from .archive import ZipParser
 # pylint: disable=line-too-long

 # Make pyflakes happy
-assert Set
 assert Pattern

-def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]:
+def _parse_xml(full_path: str) -> tuple[ET.ElementTree, dict[str, str]]:
    """ This function parses XML, with namespace support. """
    namespace_map = dict()
    for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
@ -148,7 +147,7 @@ class MSOfficeParser(ZipParser):
                return False
            xml_data = zin.read('[Content_Types].xml')

-        self.content_types = dict()  # type: Dict[str, str]
+        self.content_types = dict()  # type: dict[str, str]
        try:
            tree = ET.fromstring(xml_data)
        except ET.ParseError:
@ -431,7 +430,7 @@ class MSOfficeParser(ZipParser):

        return True

-    def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
+    def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
        """
        Yes, I know that parsing xml with regexp ain't pretty,
        be my guest and fix it if you want.
@ -512,7 +511,7 @@ class LibreOfficeParser(ZipParser):
                return False
        return True

-    def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
+    def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
        """
        Yes, I know that parsing xml with regexp ain't pretty,
        be my guest and fix it if you want.
--- a/libmat2/parser_factory.py
+++ b/libmat2/parser_factory.py
@ -2,7 +2,7 @@ import glob
 import os
 import mimetypes
 import importlib
-from typing import TypeVar, List, Tuple, Optional
+from typing import TypeVar, Optional

 from . import abstract, UNSUPPORTED_EXTENSIONS

@ -34,7 +34,7 @@ def __load_all_parsers():
 __load_all_parsers()


-def _get_parsers() -> List[T]:
+def _get_parsers() -> list[T]:
    """ Get all our parsers!"""
    def __get_parsers(cls):
        return cls.__subclasses__() + \
@ -42,7 +42,7 @@ def _get_parsers() -> List[T]:
    return __get_parsers(abstract.AbstractParser)


-def get_parser(filename: str) -> Tuple[Optional[T], Optional[str]]:
+def get_parser(filename: str) -> tuple[Optional[T], Optional[str]]:
    """ Return the appropriate parser for a given filename.

        :raises ValueError: Raised if the instantiation of the parser went wrong.
--- a/libmat2/pdf.py
+++ b/libmat2/pdf.py
@ -7,7 +7,7 @@ import re
 import logging
 import tempfile
 import io
-from typing import Dict, Union
+from typing import Union
 from distutils.version import LooseVersion

 import cairo
@ -146,13 +146,13 @@ class PDFParser(abstract.AbstractParser):
        return True

    @staticmethod
-    def __parse_metadata_field(data: str) -> Dict[str, str]:
+    def __parse_metadata_field(data: str) -> dict[str, str]:
        metadata = {}
        for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I):
            metadata[key] = value
        return metadata

-    def get_meta(self) -> Dict[str, Union[str, dict]]:
+    def get_meta(self) -> dict[str, Union[str, dict]]:
        """ Return a dict with all the meta of the file
        """
        metadata = {}
--- a/libmat2/torrent.py
+++ b/libmat2/torrent.py
@ -1,5 +1,5 @@
 import logging
-from typing import Union, Tuple, Dict
+from typing import Union

 from . import abstract

@ -15,7 +15,7 @@ class TorrentParser(abstract.AbstractParser):
        if self.dict_repr is None:
            raise ValueError

-    def get_meta(self) -> Dict[str, Union[str, dict]]:
+    def get_meta(self) -> dict[str, Union[str, dict]]:
        metadata = {}
        for key, value in self.dict_repr.items():
            if key not in self.allowlist:
@ -56,7 +56,7 @@ class _BencodeHandler:
        }

    @staticmethod
-    def __decode_int(s: bytes) -> Tuple[int, bytes]:
+    def __decode_int(s: bytes) -> tuple[int, bytes]:
        s = s[1:]
        next_idx = s.index(b'e')
        if s.startswith(b'-0'):
@ -66,7 +66,7 @@ class _BencodeHandler:
        return int(s[:next_idx]), s[next_idx+1:]

    @staticmethod
-    def __decode_string(s: bytes) -> Tuple[bytes, bytes]:
+    def __decode_string(s: bytes) -> tuple[bytes, bytes]:
        colon = s.index(b':')
        # FIXME Python3 is broken here, the call to `ord` shouldn't be needed,
        # but apparently it is. This is utterly idiotic.
@ -76,7 +76,7 @@ class _BencodeHandler:
        s = s[1:]
        return s[colon:colon+str_len], s[colon+str_len:]

-    def __decode_list(self, s: bytes) -> Tuple[list, bytes]:
+    def __decode_list(self, s: bytes) -> tuple[list, bytes]:
        ret = list()
        s = s[1:]  # skip leading `l`
        while s[0] != ord('e'):
@ -84,7 +84,7 @@ class _BencodeHandler:
            ret.append(value)
        return ret, s[1:]

-    def __decode_dict(self, s: bytes) -> Tuple[dict, bytes]:
+    def __decode_dict(self, s: bytes) -> tuple[dict, bytes]:
        ret = dict()
        s = s[1:]  # skip leading `d`
        while s[0] != ord(b'e'):
--- a/libmat2/video.py
+++ b/libmat2/video.py
@ -3,7 +3,7 @@ import functools
 import shutil
 import logging

-from typing import Dict, Union
+from typing import Union

 from . import exiftool
 from . import bubblewrap
@ -12,7 +12,7 @@ from . import bubblewrap
 class AbstractFFmpegParser(exiftool.ExiftoolParser):
    """ Abstract parser for all FFmpeg-based ones, mainly for video. """
    # Some fileformats have mandatory metadata fields
-    meta_key_value_allowlist = {}  # type: Dict[str, Union[str, int]]
+    meta_key_value_allowlist = {}  # type: dict[str, Union[str, int]]

    def remove_all(self) -> bool:
        if self.meta_key_value_allowlist:
@ -45,10 +45,10 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
            return False
        return True

-    def get_meta(self) -> Dict[str, Union[str, dict]]:
+    def get_meta(self) -> dict[str, Union[str, dict]]:
        meta = super().get_meta()

-        ret = dict()  # type: Dict[str, Union[str, dict]]
+        ret = dict()  # type: dict[str, Union[str, dict]]
        for key, value in meta.items():
            if key in self.meta_key_value_allowlist:
                if value == self.meta_key_value_allowlist[key]:
--- a/libmat2/web.py
+++ b/libmat2/web.py
@ -1,11 +1,10 @@
 from html import parser, escape
-from typing import Dict, Any, List, Tuple, Set, Optional
+from typing import  Any, Optional
 import re
 import string

 from . import abstract

-assert Set

 # pylint: disable=too-many-instance-attributes

@ -26,7 +25,7 @@ class CSSParser(abstract.AbstractParser):
            f.write(cleaned)
        return True

-    def get_meta(self) -> Dict[str, Any]:
+    def get_meta(self) -> dict[str, Any]:
        metadata = {}
        with open(self.filename, encoding='utf-8') as f:
            try:
@ -45,10 +44,10 @@ class CSSParser(abstract.AbstractParser):


 class AbstractHTMLParser(abstract.AbstractParser):
-    tags_blocklist = set()  # type: Set[str]
+    tags_blocklist = set()  # type: set[str]
    # In some html/xml-based formats some tags are mandatory,
    # so we're keeping them, but are discarding their content
-    tags_required_blocklist = set()  # type: Set[str]
+    tags_required_blocklist = set()  # type: set[str]

    def __init__(self, filename):
        super().__init__(filename)
@ -58,7 +57,7 @@ class AbstractHTMLParser(abstract.AbstractParser):
            self.__parser.feed(f.read())
        self.__parser.close()

-    def get_meta(self) -> Dict[str, Any]:
+    def get_meta(self) -> dict[str, Any]:
        return self.__parser.get_meta()

    def remove_all(self) -> bool:
@ -92,7 +91,7 @@ class _HTMLParser(parser.HTMLParser):
        self.filename = filename
        self.__textrepr = ''
        self.__meta = {}
-        self.__validation_queue = []  # type: List[str]
+        self.__validation_queue = []  # type: list[str]

        # We're using counters instead of booleans, to handle nested tags
        self.__in_dangerous_but_required_tag = 0
@ -114,7 +113,7 @@ class _HTMLParser(parser.HTMLParser):
        """
        raise ValueError(message)

-    def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
+    def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]):
        # Ignore the type, because mypy is too stupid to infer
        # that get_starttag_text() can't return None.
        original_tag = self.get_starttag_text()  # type: ignore
@ -161,7 +160,7 @@ class _HTMLParser(parser.HTMLParser):
                    self.__textrepr += escape(data)

    def handle_startendtag(self, tag: str,
-                           attrs: List[Tuple[str, Optional[str]]]):
+                           attrs: list[tuple[str, Optional[str]]]):
        if tag in self.tag_required_blocklist | self.tag_blocklist:
            meta = {k:v for k, v in attrs}
            name = meta.get('name', 'harmful metadata')
@ -186,7 +185,7 @@ class _HTMLParser(parser.HTMLParser):
            f.write(self.__textrepr)
        return True

-    def get_meta(self) -> Dict[str, Any]:
+    def get_meta(self) -> dict[str, Any]:
        if self.__validation_queue:
            raise ValueError("Some tags (%s) were left unclosed in %s" % (
                ', '.join(self.__validation_queue),