Simplify the typing annotations
This commit is contained in:
parent
292f44c086
commit
cc5be8608b
@ -2,12 +2,11 @@
|
||||
|
||||
import enum
|
||||
import importlib
|
||||
from typing import Dict, Optional, Union
|
||||
from typing import Optional, Union
|
||||
|
||||
from . import exiftool, video
|
||||
|
||||
# make pyflakes happy
|
||||
assert Dict
|
||||
assert Optional
|
||||
assert Union
|
||||
|
||||
@ -67,8 +66,8 @@ CMD_DEPENDENCIES = {
|
||||
},
|
||||
}
|
||||
|
||||
def check_dependencies() -> Dict[str, Dict[str, bool]]:
|
||||
ret = dict() # type: Dict[str, dict]
|
||||
def check_dependencies() -> dict[str, dict[str, bool]]:
|
||||
ret = dict() # type: dict[str, dict]
|
||||
|
||||
for key, value in DEPENDENCIES.items():
|
||||
ret[key] = {
|
||||
|
@ -1,9 +1,7 @@
|
||||
import abc
|
||||
import os
|
||||
import re
|
||||
from typing import Set, Dict, Union
|
||||
|
||||
assert Set # make pyflakes happy
|
||||
from typing import Union
|
||||
|
||||
|
||||
class AbstractParser(abc.ABC):
|
||||
@ -11,8 +9,8 @@ class AbstractParser(abc.ABC):
|
||||
It might yield `ValueError` on instantiation on invalid files,
|
||||
and `RuntimeError` when something went wrong in `remove_all`.
|
||||
"""
|
||||
meta_list = set() # type: Set[str]
|
||||
mimetypes = set() # type: Set[str]
|
||||
meta_list = set() # type: set[str]
|
||||
mimetypes = set() # type: set[str]
|
||||
|
||||
def __init__(self, filename: str) -> None:
|
||||
"""
|
||||
@ -35,7 +33,7 @@ class AbstractParser(abc.ABC):
|
||||
self.sandbox = True
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
||||
"""Return all the metadata of the current file"""
|
||||
|
||||
@abc.abstractmethod
|
||||
|
@ -7,12 +7,11 @@ import tempfile
|
||||
import os
|
||||
import logging
|
||||
import shutil
|
||||
from typing import Dict, Set, Pattern, Union, Any, List
|
||||
from typing import Pattern, Union, Any
|
||||
|
||||
from . import abstract, UnknownMemberPolicy, parser_factory
|
||||
|
||||
# Make pyflakes happy
|
||||
assert Set
|
||||
assert Pattern
|
||||
|
||||
# pylint: disable=not-callable,assignment-from-no-return,too-many-branches
|
||||
@ -53,11 +52,11 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
||||
|
||||
# Those are the files that have a format that _isn't_
|
||||
# supported by mat2, but that we want to keep anyway.
|
||||
self.files_to_keep = set() # type: Set[Pattern]
|
||||
self.files_to_keep = set() # type: set[Pattern]
|
||||
|
||||
# Those are the files that we _do not_ want to keep,
|
||||
# no matter if they are supported or not.
|
||||
self.files_to_omit = set() # type: Set[Pattern]
|
||||
self.files_to_omit = set() # type: set[Pattern]
|
||||
|
||||
# what should the parser do if it encounters an unknown file in
|
||||
# the archive?
|
||||
@ -76,7 +75,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
||||
# pylint: disable=unused-argument,no-self-use
|
||||
return True # pragma: no cover
|
||||
|
||||
def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
|
||||
def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
|
||||
""" This method can be used to extract specific metadata
|
||||
from files present in the archive."""
|
||||
# pylint: disable=unused-argument,no-self-use
|
||||
@ -91,7 +90,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
||||
|
||||
@staticmethod
|
||||
@abc.abstractmethod
|
||||
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
|
||||
def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
|
||||
"""Return all the members of the archive."""
|
||||
|
||||
@staticmethod
|
||||
@ -101,7 +100,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
||||
|
||||
@staticmethod
|
||||
@abc.abstractmethod
|
||||
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
|
||||
def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
|
||||
"""Return all the metadata of a given member."""
|
||||
|
||||
@staticmethod
|
||||
@ -132,8 +131,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
||||
# pylint: disable=unused-argument
|
||||
return member
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||
meta = dict() # type: Dict[str, Union[str, dict]]
|
||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
||||
meta = dict() # type: dict[str, Union[str, dict]]
|
||||
|
||||
with self.archive_class(self.filename) as zin:
|
||||
temp_folder = tempfile.mkdtemp()
|
||||
@ -174,7 +173,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
||||
|
||||
# Sort the items to process, to reduce fingerprinting,
|
||||
# and keep them in the `items` variable.
|
||||
items = list() # type: List[ArchiveMember]
|
||||
items = list() # type: list[ArchiveMember]
|
||||
for item in sorted(self._get_all_members(zin), key=self._get_member_name):
|
||||
# Some fileformats do require to have the `mimetype` file
|
||||
# as the first file in the archive.
|
||||
@ -340,7 +339,7 @@ class TarParser(ArchiveBasedAbstractParser):
|
||||
return member
|
||||
|
||||
@staticmethod
|
||||
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
|
||||
def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
|
||||
assert isinstance(member, tarfile.TarInfo) # please mypy
|
||||
metadata = {}
|
||||
if member.mtime != 0:
|
||||
@ -362,7 +361,7 @@ class TarParser(ArchiveBasedAbstractParser):
|
||||
archive.add(full_path, member.name, filter=TarParser._clean_member) # type: ignore
|
||||
|
||||
@staticmethod
|
||||
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
|
||||
def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
|
||||
assert isinstance(archive, tarfile.TarFile) # please mypy
|
||||
return archive.getmembers() # type: ignore
|
||||
|
||||
@ -416,7 +415,7 @@ class ZipParser(ArchiveBasedAbstractParser):
|
||||
return member
|
||||
|
||||
@staticmethod
|
||||
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
|
||||
def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
|
||||
assert isinstance(member, zipfile.ZipInfo) # please mypy
|
||||
metadata = {}
|
||||
if member.create_system == 3: # this is Linux
|
||||
@ -443,7 +442,7 @@ class ZipParser(ArchiveBasedAbstractParser):
|
||||
compress_type=member.compress_type)
|
||||
|
||||
@staticmethod
|
||||
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
|
||||
def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
|
||||
assert isinstance(archive, zipfile.ZipFile) # please mypy
|
||||
return archive.infolist() # type: ignore
|
||||
|
||||
|
@ -2,7 +2,7 @@ import mimetypes
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from typing import Dict, Union
|
||||
from typing import Union
|
||||
|
||||
import mutagen
|
||||
|
||||
@ -18,7 +18,7 @@ class MutagenParser(abstract.AbstractParser):
|
||||
except mutagen.MutagenError:
|
||||
raise ValueError
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
||||
f = mutagen.File(self.filename)
|
||||
if f.tags:
|
||||
return {k:', '.join(map(str, v)) for k, v in f.tags.items()}
|
||||
@ -38,8 +38,8 @@ class MutagenParser(abstract.AbstractParser):
|
||||
class MP3Parser(MutagenParser):
|
||||
mimetypes = {'audio/mpeg', }
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||
metadata = {} # type: Dict[str, Union[str, dict]]
|
||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
||||
metadata = {} # type: dict[str, Union[str, dict]]
|
||||
meta = mutagen.File(self.filename).tags
|
||||
if not meta:
|
||||
return metadata
|
||||
@ -68,7 +68,7 @@ class FLACParser(MutagenParser):
|
||||
f.save(deleteid3=True)
|
||||
return True
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
||||
meta = super().get_meta()
|
||||
for num, picture in enumerate(mutagen.File(self.filename).pictures):
|
||||
name = picture.desc if picture.desc else 'Cover %d' % num
|
||||
|
@ -12,7 +12,7 @@ import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import functools
|
||||
from typing import List, Optional
|
||||
from typing import Optional
|
||||
|
||||
|
||||
__all__ = ['PIPE', 'run', 'CalledProcessError']
|
||||
@ -33,7 +33,7 @@ def _get_bwrap_path() -> str:
|
||||
|
||||
def _get_bwrap_args(tempdir: str,
|
||||
input_filename: str,
|
||||
output_filename: Optional[str] = None) -> List[str]:
|
||||
output_filename: Optional[str] = None) -> list[str]:
|
||||
ro_bind_args = []
|
||||
cwd = os.getcwd()
|
||||
|
||||
@ -78,7 +78,7 @@ def _get_bwrap_args(tempdir: str,
|
||||
return args
|
||||
|
||||
|
||||
def run(args: List[str],
|
||||
def run(args: list[str],
|
||||
input_filename: str,
|
||||
output_filename: Optional[str] = None,
|
||||
**kwargs) -> subprocess.CompletedProcess:
|
||||
|
@ -3,7 +3,7 @@ import re
|
||||
import uuid
|
||||
import zipfile
|
||||
import xml.etree.ElementTree as ET # type: ignore
|
||||
from typing import Dict, Any
|
||||
from typing import Any
|
||||
|
||||
from . import archive, office
|
||||
|
||||
@ -37,7 +37,7 @@ class EPUBParser(archive.ZipParser):
|
||||
if member_name.endswith('META-INF/encryption.xml'):
|
||||
raise ValueError('the file contains encrypted fonts')
|
||||
|
||||
def _specific_get_meta(self, full_path, file_path) -> Dict[str, Any]:
|
||||
def _specific_get_meta(self, full_path, file_path) -> dict[str, Any]:
|
||||
if not file_path.endswith('.opf'):
|
||||
return {}
|
||||
|
||||
|
@ -4,23 +4,20 @@ import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from typing import Dict, Union, Set
|
||||
from typing import Union
|
||||
|
||||
from . import abstract
|
||||
from . import bubblewrap
|
||||
|
||||
# Make pyflakes happy
|
||||
assert Set
|
||||
|
||||
|
||||
class ExiftoolParser(abstract.AbstractParser):
|
||||
""" Exiftool is often the easiest way to get all the metadata
|
||||
from a import file, hence why several parsers are re-using its `get_meta`
|
||||
method.
|
||||
"""
|
||||
meta_allowlist = set() # type: Set[str]
|
||||
meta_allowlist = set() # type: set[str]
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
||||
try:
|
||||
if self.sandbox:
|
||||
out = bubblewrap.run([_get_exiftool_path(), '-json',
|
||||
|
@ -1,5 +1,5 @@
|
||||
import shutil
|
||||
from typing import Dict, Union
|
||||
from typing import Union
|
||||
from . import abstract
|
||||
|
||||
|
||||
@ -7,7 +7,7 @@ class HarmlessParser(abstract.AbstractParser):
|
||||
""" This is the parser for filetypes that can not contain metadata. """
|
||||
mimetypes = {'text/plain', 'image/x-ms-bmp'}
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
||||
return dict()
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
|
@ -1,7 +1,7 @@
|
||||
import imghdr
|
||||
import os
|
||||
import re
|
||||
from typing import Set, Dict, Union, Any
|
||||
from typing import Union, Any
|
||||
|
||||
import cairo
|
||||
|
||||
@ -13,7 +13,6 @@ from gi.repository import GdkPixbuf, GLib, Rsvg
|
||||
from . import exiftool, abstract
|
||||
|
||||
# Make pyflakes happy
|
||||
assert Set
|
||||
assert Any
|
||||
|
||||
class SVGParser(exiftool.ExiftoolParser):
|
||||
@ -50,7 +49,7 @@ class SVGParser(exiftool.ExiftoolParser):
|
||||
surface.finish()
|
||||
return True
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
||||
meta = super().get_meta()
|
||||
|
||||
# The namespace is mandatory, but only the …/2000/svg is valid.
|
||||
@ -165,8 +164,8 @@ class TiffParser(GdkPixbufAbstractParser):
|
||||
class PPMParser(abstract.AbstractParser):
|
||||
mimetypes = {'image/x-portable-pixmap'}
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||
meta = {} # type: Dict[str, Union[str, Dict[Any, Any]]]
|
||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
||||
meta = {} # type: dict[str, Union[str, dict[Any, Any]]]
|
||||
with open(self.filename) as f:
|
||||
for idx, line in enumerate(f):
|
||||
if line.lstrip().startswith('#'):
|
||||
|
@ -4,7 +4,7 @@ import logging
|
||||
import os
|
||||
import re
|
||||
import zipfile
|
||||
from typing import Dict, Set, Pattern, Tuple, Any
|
||||
from typing import Pattern, Any
|
||||
|
||||
import xml.etree.ElementTree as ET # type: ignore
|
||||
|
||||
@ -13,10 +13,9 @@ from .archive import ZipParser
|
||||
# pylint: disable=line-too-long
|
||||
|
||||
# Make pyflakes happy
|
||||
assert Set
|
||||
assert Pattern
|
||||
|
||||
def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]:
|
||||
def _parse_xml(full_path: str) -> tuple[ET.ElementTree, dict[str, str]]:
|
||||
""" This function parses XML, with namespace support. """
|
||||
namespace_map = dict()
|
||||
for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
|
||||
@ -148,7 +147,7 @@ class MSOfficeParser(ZipParser):
|
||||
return False
|
||||
xml_data = zin.read('[Content_Types].xml')
|
||||
|
||||
self.content_types = dict() # type: Dict[str, str]
|
||||
self.content_types = dict() # type: dict[str, str]
|
||||
try:
|
||||
tree = ET.fromstring(xml_data)
|
||||
except ET.ParseError:
|
||||
@ -431,7 +430,7 @@ class MSOfficeParser(ZipParser):
|
||||
|
||||
return True
|
||||
|
||||
def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
|
||||
def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
|
||||
"""
|
||||
Yes, I know that parsing xml with regexp ain't pretty,
|
||||
be my guest and fix it if you want.
|
||||
@ -512,7 +511,7 @@ class LibreOfficeParser(ZipParser):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
|
||||
def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
|
||||
"""
|
||||
Yes, I know that parsing xml with regexp ain't pretty,
|
||||
be my guest and fix it if you want.
|
||||
|
@ -2,7 +2,7 @@ import glob
|
||||
import os
|
||||
import mimetypes
|
||||
import importlib
|
||||
from typing import TypeVar, List, Tuple, Optional
|
||||
from typing import TypeVar, Optional
|
||||
|
||||
from . import abstract, UNSUPPORTED_EXTENSIONS
|
||||
|
||||
@ -34,7 +34,7 @@ def __load_all_parsers():
|
||||
__load_all_parsers()
|
||||
|
||||
|
||||
def _get_parsers() -> List[T]:
|
||||
def _get_parsers() -> list[T]:
|
||||
""" Get all our parsers!"""
|
||||
def __get_parsers(cls):
|
||||
return cls.__subclasses__() + \
|
||||
@ -42,7 +42,7 @@ def _get_parsers() -> List[T]:
|
||||
return __get_parsers(abstract.AbstractParser)
|
||||
|
||||
|
||||
def get_parser(filename: str) -> Tuple[Optional[T], Optional[str]]:
|
||||
def get_parser(filename: str) -> tuple[Optional[T], Optional[str]]:
|
||||
""" Return the appropriate parser for a given filename.
|
||||
|
||||
:raises ValueError: Raised if the instantiation of the parser went wrong.
|
||||
|
@ -7,7 +7,7 @@ import re
|
||||
import logging
|
||||
import tempfile
|
||||
import io
|
||||
from typing import Dict, Union
|
||||
from typing import Union
|
||||
from distutils.version import LooseVersion
|
||||
|
||||
import cairo
|
||||
@ -146,13 +146,13 @@ class PDFParser(abstract.AbstractParser):
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def __parse_metadata_field(data: str) -> Dict[str, str]:
|
||||
def __parse_metadata_field(data: str) -> dict[str, str]:
|
||||
metadata = {}
|
||||
for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I):
|
||||
metadata[key] = value
|
||||
return metadata
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
||||
""" Return a dict with all the meta of the file
|
||||
"""
|
||||
metadata = {}
|
||||
|
@ -1,5 +1,5 @@
|
||||
import logging
|
||||
from typing import Union, Tuple, Dict
|
||||
from typing import Union
|
||||
|
||||
from . import abstract
|
||||
|
||||
@ -15,7 +15,7 @@ class TorrentParser(abstract.AbstractParser):
|
||||
if self.dict_repr is None:
|
||||
raise ValueError
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
||||
metadata = {}
|
||||
for key, value in self.dict_repr.items():
|
||||
if key not in self.allowlist:
|
||||
@ -56,7 +56,7 @@ class _BencodeHandler:
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def __decode_int(s: bytes) -> Tuple[int, bytes]:
|
||||
def __decode_int(s: bytes) -> tuple[int, bytes]:
|
||||
s = s[1:]
|
||||
next_idx = s.index(b'e')
|
||||
if s.startswith(b'-0'):
|
||||
@ -66,7 +66,7 @@ class _BencodeHandler:
|
||||
return int(s[:next_idx]), s[next_idx+1:]
|
||||
|
||||
@staticmethod
|
||||
def __decode_string(s: bytes) -> Tuple[bytes, bytes]:
|
||||
def __decode_string(s: bytes) -> tuple[bytes, bytes]:
|
||||
colon = s.index(b':')
|
||||
# FIXME Python3 is broken here, the call to `ord` shouldn't be needed,
|
||||
# but apparently it is. This is utterly idiotic.
|
||||
@ -76,7 +76,7 @@ class _BencodeHandler:
|
||||
s = s[1:]
|
||||
return s[colon:colon+str_len], s[colon+str_len:]
|
||||
|
||||
def __decode_list(self, s: bytes) -> Tuple[list, bytes]:
|
||||
def __decode_list(self, s: bytes) -> tuple[list, bytes]:
|
||||
ret = list()
|
||||
s = s[1:] # skip leading `l`
|
||||
while s[0] != ord('e'):
|
||||
@ -84,7 +84,7 @@ class _BencodeHandler:
|
||||
ret.append(value)
|
||||
return ret, s[1:]
|
||||
|
||||
def __decode_dict(self, s: bytes) -> Tuple[dict, bytes]:
|
||||
def __decode_dict(self, s: bytes) -> tuple[dict, bytes]:
|
||||
ret = dict()
|
||||
s = s[1:] # skip leading `d`
|
||||
while s[0] != ord(b'e'):
|
||||
|
@ -3,7 +3,7 @@ import functools
|
||||
import shutil
|
||||
import logging
|
||||
|
||||
from typing import Dict, Union
|
||||
from typing import Union
|
||||
|
||||
from . import exiftool
|
||||
from . import bubblewrap
|
||||
@ -12,7 +12,7 @@ from . import bubblewrap
|
||||
class AbstractFFmpegParser(exiftool.ExiftoolParser):
|
||||
""" Abstract parser for all FFmpeg-based ones, mainly for video. """
|
||||
# Some fileformats have mandatory metadata fields
|
||||
meta_key_value_allowlist = {} # type: Dict[str, Union[str, int]]
|
||||
meta_key_value_allowlist = {} # type: dict[str, Union[str, int]]
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
if self.meta_key_value_allowlist:
|
||||
@ -45,10 +45,10 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
||||
meta = super().get_meta()
|
||||
|
||||
ret = dict() # type: Dict[str, Union[str, dict]]
|
||||
ret = dict() # type: dict[str, Union[str, dict]]
|
||||
for key, value in meta.items():
|
||||
if key in self.meta_key_value_allowlist:
|
||||
if value == self.meta_key_value_allowlist[key]:
|
||||
|
@ -1,11 +1,10 @@
|
||||
from html import parser, escape
|
||||
from typing import Dict, Any, List, Tuple, Set, Optional
|
||||
from typing import Any, Optional
|
||||
import re
|
||||
import string
|
||||
|
||||
from . import abstract
|
||||
|
||||
assert Set
|
||||
|
||||
# pylint: disable=too-many-instance-attributes
|
||||
|
||||
@ -26,7 +25,7 @@ class CSSParser(abstract.AbstractParser):
|
||||
f.write(cleaned)
|
||||
return True
|
||||
|
||||
def get_meta(self) -> Dict[str, Any]:
|
||||
def get_meta(self) -> dict[str, Any]:
|
||||
metadata = {}
|
||||
with open(self.filename, encoding='utf-8') as f:
|
||||
try:
|
||||
@ -45,10 +44,10 @@ class CSSParser(abstract.AbstractParser):
|
||||
|
||||
|
||||
class AbstractHTMLParser(abstract.AbstractParser):
|
||||
tags_blocklist = set() # type: Set[str]
|
||||
tags_blocklist = set() # type: set[str]
|
||||
# In some html/xml-based formats some tags are mandatory,
|
||||
# so we're keeping them, but are discarding their content
|
||||
tags_required_blocklist = set() # type: Set[str]
|
||||
tags_required_blocklist = set() # type: set[str]
|
||||
|
||||
def __init__(self, filename):
|
||||
super().__init__(filename)
|
||||
@ -58,7 +57,7 @@ class AbstractHTMLParser(abstract.AbstractParser):
|
||||
self.__parser.feed(f.read())
|
||||
self.__parser.close()
|
||||
|
||||
def get_meta(self) -> Dict[str, Any]:
|
||||
def get_meta(self) -> dict[str, Any]:
|
||||
return self.__parser.get_meta()
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
@ -92,7 +91,7 @@ class _HTMLParser(parser.HTMLParser):
|
||||
self.filename = filename
|
||||
self.__textrepr = ''
|
||||
self.__meta = {}
|
||||
self.__validation_queue = [] # type: List[str]
|
||||
self.__validation_queue = [] # type: list[str]
|
||||
|
||||
# We're using counters instead of booleans, to handle nested tags
|
||||
self.__in_dangerous_but_required_tag = 0
|
||||
@ -114,7 +113,7 @@ class _HTMLParser(parser.HTMLParser):
|
||||
"""
|
||||
raise ValueError(message)
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
|
||||
def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]):
|
||||
# Ignore the type, because mypy is too stupid to infer
|
||||
# that get_starttag_text() can't return None.
|
||||
original_tag = self.get_starttag_text() # type: ignore
|
||||
@ -161,7 +160,7 @@ class _HTMLParser(parser.HTMLParser):
|
||||
self.__textrepr += escape(data)
|
||||
|
||||
def handle_startendtag(self, tag: str,
|
||||
attrs: List[Tuple[str, Optional[str]]]):
|
||||
attrs: list[tuple[str, Optional[str]]]):
|
||||
if tag in self.tag_required_blocklist | self.tag_blocklist:
|
||||
meta = {k:v for k, v in attrs}
|
||||
name = meta.get('name', 'harmful metadata')
|
||||
@ -186,7 +185,7 @@ class _HTMLParser(parser.HTMLParser):
|
||||
f.write(self.__textrepr)
|
||||
return True
|
||||
|
||||
def get_meta(self) -> Dict[str, Any]:
|
||||
def get_meta(self) -> dict[str, Any]:
|
||||
if self.__validation_queue:
|
||||
raise ValueError("Some tags (%s) were left unclosed in %s" % (
|
||||
', '.join(self.__validation_queue),
|
||||
|
Loading…
x
Reference in New Issue
Block a user