1
0
mirror of synced 2024-11-22 01:04:23 +01:00

Simplify the typing annotations

This commit is contained in:
jvoisin 2022-08-28 22:29:06 +02:00
parent 292f44c086
commit cc5be8608b
15 changed files with 69 additions and 79 deletions

View File

@ -2,12 +2,11 @@
import enum import enum
import importlib import importlib
from typing import Dict, Optional, Union from typing import Optional, Union
from . import exiftool, video from . import exiftool, video
# make pyflakes happy # make pyflakes happy
assert Dict
assert Optional assert Optional
assert Union assert Union
@ -67,8 +66,8 @@ CMD_DEPENDENCIES = {
}, },
} }
def check_dependencies() -> Dict[str, Dict[str, bool]]: def check_dependencies() -> dict[str, dict[str, bool]]:
ret = dict() # type: Dict[str, dict] ret = dict() # type: dict[str, dict]
for key, value in DEPENDENCIES.items(): for key, value in DEPENDENCIES.items():
ret[key] = { ret[key] = {

View File

@ -1,9 +1,7 @@
import abc import abc
import os import os
import re import re
from typing import Set, Dict, Union from typing import Union
assert Set # make pyflakes happy
class AbstractParser(abc.ABC): class AbstractParser(abc.ABC):
@ -11,8 +9,8 @@ class AbstractParser(abc.ABC):
It might yield `ValueError` on instantiation on invalid files, It might yield `ValueError` on instantiation on invalid files,
and `RuntimeError` when something went wrong in `remove_all`. and `RuntimeError` when something went wrong in `remove_all`.
""" """
meta_list = set() # type: Set[str] meta_list = set() # type: set[str]
mimetypes = set() # type: Set[str] mimetypes = set() # type: set[str]
def __init__(self, filename: str) -> None: def __init__(self, filename: str) -> None:
""" """
@ -35,7 +33,7 @@ class AbstractParser(abc.ABC):
self.sandbox = True self.sandbox = True
@abc.abstractmethod @abc.abstractmethod
def get_meta(self) -> Dict[str, Union[str, dict]]: def get_meta(self) -> dict[str, Union[str, dict]]:
"""Return all the metadata of the current file""" """Return all the metadata of the current file"""
@abc.abstractmethod @abc.abstractmethod

View File

@ -7,12 +7,11 @@ import tempfile
import os import os
import logging import logging
import shutil import shutil
from typing import Dict, Set, Pattern, Union, Any, List from typing import Pattern, Union, Any
from . import abstract, UnknownMemberPolicy, parser_factory from . import abstract, UnknownMemberPolicy, parser_factory
# Make pyflakes happy # Make pyflakes happy
assert Set
assert Pattern assert Pattern
# pylint: disable=not-callable,assignment-from-no-return,too-many-branches # pylint: disable=not-callable,assignment-from-no-return,too-many-branches
@ -53,11 +52,11 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# Those are the files that have a format that _isn't_ # Those are the files that have a format that _isn't_
# supported by mat2, but that we want to keep anyway. # supported by mat2, but that we want to keep anyway.
self.files_to_keep = set() # type: Set[Pattern] self.files_to_keep = set() # type: set[Pattern]
# Those are the files that we _do not_ want to keep, # Those are the files that we _do not_ want to keep,
# no matter if they are supported or not. # no matter if they are supported or not.
self.files_to_omit = set() # type: Set[Pattern] self.files_to_omit = set() # type: set[Pattern]
# what should the parser do if it encounters an unknown file in # what should the parser do if it encounters an unknown file in
# the archive? # the archive?
@ -76,7 +75,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# pylint: disable=unused-argument,no-self-use # pylint: disable=unused-argument,no-self-use
return True # pragma: no cover return True # pragma: no cover
def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]: def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
""" This method can be used to extract specific metadata """ This method can be used to extract specific metadata
from files present in the archive.""" from files present in the archive."""
# pylint: disable=unused-argument,no-self-use # pylint: disable=unused-argument,no-self-use
@ -91,7 +90,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
@staticmethod @staticmethod
@abc.abstractmethod @abc.abstractmethod
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]: def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
"""Return all the members of the archive.""" """Return all the members of the archive."""
@staticmethod @staticmethod
@ -101,7 +100,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
@staticmethod @staticmethod
@abc.abstractmethod @abc.abstractmethod
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]: def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
"""Return all the metadata of a given member.""" """Return all the metadata of a given member."""
@staticmethod @staticmethod
@ -132,8 +131,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# pylint: disable=unused-argument # pylint: disable=unused-argument
return member return member
def get_meta(self) -> Dict[str, Union[str, dict]]: def get_meta(self) -> dict[str, Union[str, dict]]:
meta = dict() # type: Dict[str, Union[str, dict]] meta = dict() # type: dict[str, Union[str, dict]]
with self.archive_class(self.filename) as zin: with self.archive_class(self.filename) as zin:
temp_folder = tempfile.mkdtemp() temp_folder = tempfile.mkdtemp()
@ -174,7 +173,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# Sort the items to process, to reduce fingerprinting, # Sort the items to process, to reduce fingerprinting,
# and keep them in the `items` variable. # and keep them in the `items` variable.
items = list() # type: List[ArchiveMember] items = list() # type: list[ArchiveMember]
for item in sorted(self._get_all_members(zin), key=self._get_member_name): for item in sorted(self._get_all_members(zin), key=self._get_member_name):
# Some fileformats do require to have the `mimetype` file # Some fileformats do require to have the `mimetype` file
# as the first file in the archive. # as the first file in the archive.
@ -340,7 +339,7 @@ class TarParser(ArchiveBasedAbstractParser):
return member return member
@staticmethod @staticmethod
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]: def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
assert isinstance(member, tarfile.TarInfo) # please mypy assert isinstance(member, tarfile.TarInfo) # please mypy
metadata = {} metadata = {}
if member.mtime != 0: if member.mtime != 0:
@ -362,7 +361,7 @@ class TarParser(ArchiveBasedAbstractParser):
archive.add(full_path, member.name, filter=TarParser._clean_member) # type: ignore archive.add(full_path, member.name, filter=TarParser._clean_member) # type: ignore
@staticmethod @staticmethod
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]: def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
assert isinstance(archive, tarfile.TarFile) # please mypy assert isinstance(archive, tarfile.TarFile) # please mypy
return archive.getmembers() # type: ignore return archive.getmembers() # type: ignore
@ -416,7 +415,7 @@ class ZipParser(ArchiveBasedAbstractParser):
return member return member
@staticmethod @staticmethod
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]: def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
assert isinstance(member, zipfile.ZipInfo) # please mypy assert isinstance(member, zipfile.ZipInfo) # please mypy
metadata = {} metadata = {}
if member.create_system == 3: # this is Linux if member.create_system == 3: # this is Linux
@ -443,7 +442,7 @@ class ZipParser(ArchiveBasedAbstractParser):
compress_type=member.compress_type) compress_type=member.compress_type)
@staticmethod @staticmethod
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]: def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
assert isinstance(archive, zipfile.ZipFile) # please mypy assert isinstance(archive, zipfile.ZipFile) # please mypy
return archive.infolist() # type: ignore return archive.infolist() # type: ignore

View File

@ -2,7 +2,7 @@ import mimetypes
import os import os
import shutil import shutil
import tempfile import tempfile
from typing import Dict, Union from typing import Union
import mutagen import mutagen
@ -18,7 +18,7 @@ class MutagenParser(abstract.AbstractParser):
except mutagen.MutagenError: except mutagen.MutagenError:
raise ValueError raise ValueError
def get_meta(self) -> Dict[str, Union[str, dict]]: def get_meta(self) -> dict[str, Union[str, dict]]:
f = mutagen.File(self.filename) f = mutagen.File(self.filename)
if f.tags: if f.tags:
return {k:', '.join(map(str, v)) for k, v in f.tags.items()} return {k:', '.join(map(str, v)) for k, v in f.tags.items()}
@ -38,8 +38,8 @@ class MutagenParser(abstract.AbstractParser):
class MP3Parser(MutagenParser): class MP3Parser(MutagenParser):
mimetypes = {'audio/mpeg', } mimetypes = {'audio/mpeg', }
def get_meta(self) -> Dict[str, Union[str, dict]]: def get_meta(self) -> dict[str, Union[str, dict]]:
metadata = {} # type: Dict[str, Union[str, dict]] metadata = {} # type: dict[str, Union[str, dict]]
meta = mutagen.File(self.filename).tags meta = mutagen.File(self.filename).tags
if not meta: if not meta:
return metadata return metadata
@ -68,7 +68,7 @@ class FLACParser(MutagenParser):
f.save(deleteid3=True) f.save(deleteid3=True)
return True return True
def get_meta(self) -> Dict[str, Union[str, dict]]: def get_meta(self) -> dict[str, Union[str, dict]]:
meta = super().get_meta() meta = super().get_meta()
for num, picture in enumerate(mutagen.File(self.filename).pictures): for num, picture in enumerate(mutagen.File(self.filename).pictures):
name = picture.desc if picture.desc else 'Cover %d' % num name = picture.desc if picture.desc else 'Cover %d' % num

View File

@ -12,7 +12,7 @@ import shutil
import subprocess import subprocess
import tempfile import tempfile
import functools import functools
from typing import List, Optional from typing import Optional
__all__ = ['PIPE', 'run', 'CalledProcessError'] __all__ = ['PIPE', 'run', 'CalledProcessError']
@ -33,7 +33,7 @@ def _get_bwrap_path() -> str:
def _get_bwrap_args(tempdir: str, def _get_bwrap_args(tempdir: str,
input_filename: str, input_filename: str,
output_filename: Optional[str] = None) -> List[str]: output_filename: Optional[str] = None) -> list[str]:
ro_bind_args = [] ro_bind_args = []
cwd = os.getcwd() cwd = os.getcwd()
@ -78,7 +78,7 @@ def _get_bwrap_args(tempdir: str,
return args return args
def run(args: List[str], def run(args: list[str],
input_filename: str, input_filename: str,
output_filename: Optional[str] = None, output_filename: Optional[str] = None,
**kwargs) -> subprocess.CompletedProcess: **kwargs) -> subprocess.CompletedProcess:

View File

@ -3,7 +3,7 @@ import re
import uuid import uuid
import zipfile import zipfile
import xml.etree.ElementTree as ET # type: ignore import xml.etree.ElementTree as ET # type: ignore
from typing import Dict, Any from typing import Any
from . import archive, office from . import archive, office
@ -37,7 +37,7 @@ class EPUBParser(archive.ZipParser):
if member_name.endswith('META-INF/encryption.xml'): if member_name.endswith('META-INF/encryption.xml'):
raise ValueError('the file contains encrypted fonts') raise ValueError('the file contains encrypted fonts')
def _specific_get_meta(self, full_path, file_path) -> Dict[str, Any]: def _specific_get_meta(self, full_path, file_path) -> dict[str, Any]:
if not file_path.endswith('.opf'): if not file_path.endswith('.opf'):
return {} return {}

View File

@ -4,23 +4,20 @@ import logging
import os import os
import shutil import shutil
import subprocess import subprocess
from typing import Dict, Union, Set from typing import Union
from . import abstract from . import abstract
from . import bubblewrap from . import bubblewrap
# Make pyflakes happy
assert Set
class ExiftoolParser(abstract.AbstractParser): class ExiftoolParser(abstract.AbstractParser):
""" Exiftool is often the easiest way to get all the metadata """ Exiftool is often the easiest way to get all the metadata
from a import file, hence why several parsers are re-using its `get_meta` from a import file, hence why several parsers are re-using its `get_meta`
method. method.
""" """
meta_allowlist = set() # type: Set[str] meta_allowlist = set() # type: set[str]
def get_meta(self) -> Dict[str, Union[str, dict]]: def get_meta(self) -> dict[str, Union[str, dict]]:
try: try:
if self.sandbox: if self.sandbox:
out = bubblewrap.run([_get_exiftool_path(), '-json', out = bubblewrap.run([_get_exiftool_path(), '-json',

View File

@ -1,5 +1,5 @@
import shutil import shutil
from typing import Dict, Union from typing import Union
from . import abstract from . import abstract
@ -7,7 +7,7 @@ class HarmlessParser(abstract.AbstractParser):
""" This is the parser for filetypes that can not contain metadata. """ """ This is the parser for filetypes that can not contain metadata. """
mimetypes = {'text/plain', 'image/x-ms-bmp'} mimetypes = {'text/plain', 'image/x-ms-bmp'}
def get_meta(self) -> Dict[str, Union[str, dict]]: def get_meta(self) -> dict[str, Union[str, dict]]:
return dict() return dict()
def remove_all(self) -> bool: def remove_all(self) -> bool:

View File

@ -1,7 +1,7 @@
import imghdr import imghdr
import os import os
import re import re
from typing import Set, Dict, Union, Any from typing import Union, Any
import cairo import cairo
@ -13,7 +13,6 @@ from gi.repository import GdkPixbuf, GLib, Rsvg
from . import exiftool, abstract from . import exiftool, abstract
# Make pyflakes happy # Make pyflakes happy
assert Set
assert Any assert Any
class SVGParser(exiftool.ExiftoolParser): class SVGParser(exiftool.ExiftoolParser):
@ -50,7 +49,7 @@ class SVGParser(exiftool.ExiftoolParser):
surface.finish() surface.finish()
return True return True
def get_meta(self) -> Dict[str, Union[str, dict]]: def get_meta(self) -> dict[str, Union[str, dict]]:
meta = super().get_meta() meta = super().get_meta()
# The namespace is mandatory, but only the …/2000/svg is valid. # The namespace is mandatory, but only the …/2000/svg is valid.
@ -165,8 +164,8 @@ class TiffParser(GdkPixbufAbstractParser):
class PPMParser(abstract.AbstractParser): class PPMParser(abstract.AbstractParser):
mimetypes = {'image/x-portable-pixmap'} mimetypes = {'image/x-portable-pixmap'}
def get_meta(self) -> Dict[str, Union[str, dict]]: def get_meta(self) -> dict[str, Union[str, dict]]:
meta = {} # type: Dict[str, Union[str, Dict[Any, Any]]] meta = {} # type: dict[str, Union[str, dict[Any, Any]]]
with open(self.filename) as f: with open(self.filename) as f:
for idx, line in enumerate(f): for idx, line in enumerate(f):
if line.lstrip().startswith('#'): if line.lstrip().startswith('#'):

View File

@ -4,7 +4,7 @@ import logging
import os import os
import re import re
import zipfile import zipfile
from typing import Dict, Set, Pattern, Tuple, Any from typing import Pattern, Any
import xml.etree.ElementTree as ET # type: ignore import xml.etree.ElementTree as ET # type: ignore
@ -13,10 +13,9 @@ from .archive import ZipParser
# pylint: disable=line-too-long # pylint: disable=line-too-long
# Make pyflakes happy # Make pyflakes happy
assert Set
assert Pattern assert Pattern
def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]: def _parse_xml(full_path: str) -> tuple[ET.ElementTree, dict[str, str]]:
""" This function parses XML, with namespace support. """ """ This function parses XML, with namespace support. """
namespace_map = dict() namespace_map = dict()
for _, (key, value) in ET.iterparse(full_path, ("start-ns", )): for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
@ -148,7 +147,7 @@ class MSOfficeParser(ZipParser):
return False return False
xml_data = zin.read('[Content_Types].xml') xml_data = zin.read('[Content_Types].xml')
self.content_types = dict() # type: Dict[str, str] self.content_types = dict() # type: dict[str, str]
try: try:
tree = ET.fromstring(xml_data) tree = ET.fromstring(xml_data)
except ET.ParseError: except ET.ParseError:
@ -431,7 +430,7 @@ class MSOfficeParser(ZipParser):
return True return True
def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]: def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
""" """
Yes, I know that parsing xml with regexp ain't pretty, Yes, I know that parsing xml with regexp ain't pretty,
be my guest and fix it if you want. be my guest and fix it if you want.
@ -512,7 +511,7 @@ class LibreOfficeParser(ZipParser):
return False return False
return True return True
def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]: def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
""" """
Yes, I know that parsing xml with regexp ain't pretty, Yes, I know that parsing xml with regexp ain't pretty,
be my guest and fix it if you want. be my guest and fix it if you want.

View File

@ -2,7 +2,7 @@ import glob
import os import os
import mimetypes import mimetypes
import importlib import importlib
from typing import TypeVar, List, Tuple, Optional from typing import TypeVar, Optional
from . import abstract, UNSUPPORTED_EXTENSIONS from . import abstract, UNSUPPORTED_EXTENSIONS
@ -34,7 +34,7 @@ def __load_all_parsers():
__load_all_parsers() __load_all_parsers()
def _get_parsers() -> List[T]: def _get_parsers() -> list[T]:
""" Get all our parsers!""" """ Get all our parsers!"""
def __get_parsers(cls): def __get_parsers(cls):
return cls.__subclasses__() + \ return cls.__subclasses__() + \
@ -42,7 +42,7 @@ def _get_parsers() -> List[T]:
return __get_parsers(abstract.AbstractParser) return __get_parsers(abstract.AbstractParser)
def get_parser(filename: str) -> Tuple[Optional[T], Optional[str]]: def get_parser(filename: str) -> tuple[Optional[T], Optional[str]]:
""" Return the appropriate parser for a given filename. """ Return the appropriate parser for a given filename.
:raises ValueError: Raised if the instantiation of the parser went wrong. :raises ValueError: Raised if the instantiation of the parser went wrong.

View File

@ -7,7 +7,7 @@ import re
import logging import logging
import tempfile import tempfile
import io import io
from typing import Dict, Union from typing import Union
from distutils.version import LooseVersion from distutils.version import LooseVersion
import cairo import cairo
@ -146,13 +146,13 @@ class PDFParser(abstract.AbstractParser):
return True return True
@staticmethod @staticmethod
def __parse_metadata_field(data: str) -> Dict[str, str]: def __parse_metadata_field(data: str) -> dict[str, str]:
metadata = {} metadata = {}
for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I): for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I):
metadata[key] = value metadata[key] = value
return metadata return metadata
def get_meta(self) -> Dict[str, Union[str, dict]]: def get_meta(self) -> dict[str, Union[str, dict]]:
""" Return a dict with all the meta of the file """ Return a dict with all the meta of the file
""" """
metadata = {} metadata = {}

View File

@ -1,5 +1,5 @@
import logging import logging
from typing import Union, Tuple, Dict from typing import Union
from . import abstract from . import abstract
@ -15,7 +15,7 @@ class TorrentParser(abstract.AbstractParser):
if self.dict_repr is None: if self.dict_repr is None:
raise ValueError raise ValueError
def get_meta(self) -> Dict[str, Union[str, dict]]: def get_meta(self) -> dict[str, Union[str, dict]]:
metadata = {} metadata = {}
for key, value in self.dict_repr.items(): for key, value in self.dict_repr.items():
if key not in self.allowlist: if key not in self.allowlist:
@ -56,7 +56,7 @@ class _BencodeHandler:
} }
@staticmethod @staticmethod
def __decode_int(s: bytes) -> Tuple[int, bytes]: def __decode_int(s: bytes) -> tuple[int, bytes]:
s = s[1:] s = s[1:]
next_idx = s.index(b'e') next_idx = s.index(b'e')
if s.startswith(b'-0'): if s.startswith(b'-0'):
@ -66,7 +66,7 @@ class _BencodeHandler:
return int(s[:next_idx]), s[next_idx+1:] return int(s[:next_idx]), s[next_idx+1:]
@staticmethod @staticmethod
def __decode_string(s: bytes) -> Tuple[bytes, bytes]: def __decode_string(s: bytes) -> tuple[bytes, bytes]:
colon = s.index(b':') colon = s.index(b':')
# FIXME Python3 is broken here, the call to `ord` shouldn't be needed, # FIXME Python3 is broken here, the call to `ord` shouldn't be needed,
# but apparently it is. This is utterly idiotic. # but apparently it is. This is utterly idiotic.
@ -76,7 +76,7 @@ class _BencodeHandler:
s = s[1:] s = s[1:]
return s[colon:colon+str_len], s[colon+str_len:] return s[colon:colon+str_len], s[colon+str_len:]
def __decode_list(self, s: bytes) -> Tuple[list, bytes]: def __decode_list(self, s: bytes) -> tuple[list, bytes]:
ret = list() ret = list()
s = s[1:] # skip leading `l` s = s[1:] # skip leading `l`
while s[0] != ord('e'): while s[0] != ord('e'):
@ -84,7 +84,7 @@ class _BencodeHandler:
ret.append(value) ret.append(value)
return ret, s[1:] return ret, s[1:]
def __decode_dict(self, s: bytes) -> Tuple[dict, bytes]: def __decode_dict(self, s: bytes) -> tuple[dict, bytes]:
ret = dict() ret = dict()
s = s[1:] # skip leading `d` s = s[1:] # skip leading `d`
while s[0] != ord(b'e'): while s[0] != ord(b'e'):

View File

@ -3,7 +3,7 @@ import functools
import shutil import shutil
import logging import logging
from typing import Dict, Union from typing import Union
from . import exiftool from . import exiftool
from . import bubblewrap from . import bubblewrap
@ -12,7 +12,7 @@ from . import bubblewrap
class AbstractFFmpegParser(exiftool.ExiftoolParser): class AbstractFFmpegParser(exiftool.ExiftoolParser):
""" Abstract parser for all FFmpeg-based ones, mainly for video. """ """ Abstract parser for all FFmpeg-based ones, mainly for video. """
# Some fileformats have mandatory metadata fields # Some fileformats have mandatory metadata fields
meta_key_value_allowlist = {} # type: Dict[str, Union[str, int]] meta_key_value_allowlist = {} # type: dict[str, Union[str, int]]
def remove_all(self) -> bool: def remove_all(self) -> bool:
if self.meta_key_value_allowlist: if self.meta_key_value_allowlist:
@ -45,10 +45,10 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
return False return False
return True return True
def get_meta(self) -> Dict[str, Union[str, dict]]: def get_meta(self) -> dict[str, Union[str, dict]]:
meta = super().get_meta() meta = super().get_meta()
ret = dict() # type: Dict[str, Union[str, dict]] ret = dict() # type: dict[str, Union[str, dict]]
for key, value in meta.items(): for key, value in meta.items():
if key in self.meta_key_value_allowlist: if key in self.meta_key_value_allowlist:
if value == self.meta_key_value_allowlist[key]: if value == self.meta_key_value_allowlist[key]:

View File

@ -1,11 +1,10 @@
from html import parser, escape from html import parser, escape
from typing import Dict, Any, List, Tuple, Set, Optional from typing import Any, Optional
import re import re
import string import string
from . import abstract from . import abstract
assert Set
# pylint: disable=too-many-instance-attributes # pylint: disable=too-many-instance-attributes
@ -26,7 +25,7 @@ class CSSParser(abstract.AbstractParser):
f.write(cleaned) f.write(cleaned)
return True return True
def get_meta(self) -> Dict[str, Any]: def get_meta(self) -> dict[str, Any]:
metadata = {} metadata = {}
with open(self.filename, encoding='utf-8') as f: with open(self.filename, encoding='utf-8') as f:
try: try:
@ -45,10 +44,10 @@ class CSSParser(abstract.AbstractParser):
class AbstractHTMLParser(abstract.AbstractParser): class AbstractHTMLParser(abstract.AbstractParser):
tags_blocklist = set() # type: Set[str] tags_blocklist = set() # type: set[str]
# In some html/xml-based formats some tags are mandatory, # In some html/xml-based formats some tags are mandatory,
# so we're keeping them, but are discarding their content # so we're keeping them, but are discarding their content
tags_required_blocklist = set() # type: Set[str] tags_required_blocklist = set() # type: set[str]
def __init__(self, filename): def __init__(self, filename):
super().__init__(filename) super().__init__(filename)
@ -58,7 +57,7 @@ class AbstractHTMLParser(abstract.AbstractParser):
self.__parser.feed(f.read()) self.__parser.feed(f.read())
self.__parser.close() self.__parser.close()
def get_meta(self) -> Dict[str, Any]: def get_meta(self) -> dict[str, Any]:
return self.__parser.get_meta() return self.__parser.get_meta()
def remove_all(self) -> bool: def remove_all(self) -> bool:
@ -92,7 +91,7 @@ class _HTMLParser(parser.HTMLParser):
self.filename = filename self.filename = filename
self.__textrepr = '' self.__textrepr = ''
self.__meta = {} self.__meta = {}
self.__validation_queue = [] # type: List[str] self.__validation_queue = [] # type: list[str]
# We're using counters instead of booleans, to handle nested tags # We're using counters instead of booleans, to handle nested tags
self.__in_dangerous_but_required_tag = 0 self.__in_dangerous_but_required_tag = 0
@ -114,7 +113,7 @@ class _HTMLParser(parser.HTMLParser):
""" """
raise ValueError(message) raise ValueError(message)
def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]): def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]):
# Ignore the type, because mypy is too stupid to infer # Ignore the type, because mypy is too stupid to infer
# that get_starttag_text() can't return None. # that get_starttag_text() can't return None.
original_tag = self.get_starttag_text() # type: ignore original_tag = self.get_starttag_text() # type: ignore
@ -161,7 +160,7 @@ class _HTMLParser(parser.HTMLParser):
self.__textrepr += escape(data) self.__textrepr += escape(data)
def handle_startendtag(self, tag: str, def handle_startendtag(self, tag: str,
attrs: List[Tuple[str, Optional[str]]]): attrs: list[tuple[str, Optional[str]]]):
if tag in self.tag_required_blocklist | self.tag_blocklist: if tag in self.tag_required_blocklist | self.tag_blocklist:
meta = {k:v for k, v in attrs} meta = {k:v for k, v in attrs}
name = meta.get('name', 'harmful metadata') name = meta.get('name', 'harmful metadata')
@ -186,7 +185,7 @@ class _HTMLParser(parser.HTMLParser):
f.write(self.__textrepr) f.write(self.__textrepr)
return True return True
def get_meta(self) -> Dict[str, Any]: def get_meta(self) -> dict[str, Any]:
if self.__validation_queue: if self.__validation_queue:
raise ValueError("Some tags (%s) were left unclosed in %s" % ( raise ValueError("Some tags (%s) were left unclosed in %s" % (
', '.join(self.__validation_queue), ', '.join(self.__validation_queue),