Fix the type annotations
This commit is contained in:
parent
1f73a16ef3
commit
39fb254e01
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import enum
|
import enum
|
||||||
import importlib
|
import importlib
|
||||||
from typing import Optional, Union
|
from typing import Optional, Union, Dict
|
||||||
|
|
||||||
from . import exiftool, video
|
from . import exiftool, video
|
||||||
|
|
||||||
@ -66,8 +66,9 @@ CMD_DEPENDENCIES = {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def check_dependencies() -> dict[str, dict[str, bool]]:
|
|
||||||
ret = dict() # type: dict[str, dict]
|
def check_dependencies() -> Dict[str, Dict[str, bool]]:
|
||||||
|
ret = dict() # type: Dict[str, Dict]
|
||||||
|
|
||||||
for key, value in DEPENDENCIES.items():
|
for key, value in DEPENDENCIES.items():
|
||||||
ret[key] = {
|
ret[key] = {
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import abc
|
import abc
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from typing import Union
|
from typing import Union, Set, Dict
|
||||||
|
|
||||||
|
|
||||||
class AbstractParser(abc.ABC):
|
class AbstractParser(abc.ABC):
|
||||||
@ -9,8 +9,8 @@ class AbstractParser(abc.ABC):
|
|||||||
It might yield `ValueError` on instantiation on invalid files,
|
It might yield `ValueError` on instantiation on invalid files,
|
||||||
and `RuntimeError` when something went wrong in `remove_all`.
|
and `RuntimeError` when something went wrong in `remove_all`.
|
||||||
"""
|
"""
|
||||||
meta_list = set() # type: set[str]
|
meta_list = set() # type: Set[str]
|
||||||
mimetypes = set() # type: set[str]
|
mimetypes = set() # type: Set[str]
|
||||||
|
|
||||||
def __init__(self, filename: str) -> None:
|
def __init__(self, filename: str) -> None:
|
||||||
"""
|
"""
|
||||||
@ -33,7 +33,7 @@ class AbstractParser(abc.ABC):
|
|||||||
self.sandbox = True
|
self.sandbox = True
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||||
"""Return all the metadata of the current file"""
|
"""Return all the metadata of the current file"""
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
|
@ -7,7 +7,7 @@ import tempfile
|
|||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
from typing import Pattern, Union, Any
|
from typing import Pattern, Union, Any, Set, Dict, List
|
||||||
|
|
||||||
from . import abstract, UnknownMemberPolicy, parser_factory
|
from . import abstract, UnknownMemberPolicy, parser_factory
|
||||||
|
|
||||||
@ -49,11 +49,11 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
|
|
||||||
# Those are the files that have a format that _isn't_
|
# Those are the files that have a format that _isn't_
|
||||||
# supported by mat2, but that we want to keep anyway.
|
# supported by mat2, but that we want to keep anyway.
|
||||||
self.files_to_keep = set() # type: set[Pattern]
|
self.files_to_keep = set() # type: Set[Pattern]
|
||||||
|
|
||||||
# Those are the files that we _do not_ want to keep,
|
# Those are the files that we _do not_ want to keep,
|
||||||
# no matter if they are supported or not.
|
# no matter if they are supported or not.
|
||||||
self.files_to_omit = set() # type: set[Pattern]
|
self.files_to_omit = set() # type: Set[Pattern]
|
||||||
|
|
||||||
# what should the parser do if it encounters an unknown file in
|
# what should the parser do if it encounters an unknown file in
|
||||||
# the archive?
|
# the archive?
|
||||||
@ -72,7 +72,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
return True # pragma: no cover
|
return True # pragma: no cover
|
||||||
|
|
||||||
def _specific_get_meta(self, full_path: str, file_path: str) -> dict[str, Any]:
|
def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
|
||||||
""" This method can be used to extract specific metadata
|
""" This method can be used to extract specific metadata
|
||||||
from files present in the archive."""
|
from files present in the archive."""
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
@ -87,7 +87,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
|
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
|
||||||
"""Return all the members of the archive."""
|
"""Return all the members of the archive."""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -97,7 +97,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
|
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
|
||||||
"""Return all the metadata of a given member."""
|
"""Return all the metadata of a given member."""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -128,8 +128,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
return member
|
return member
|
||||||
|
|
||||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
def get_meta(self) -> dict[str, Union[str, Dict]]:
|
||||||
meta = dict() # type: dict[str, Union[str, dict]]
|
meta = dict() # type: Dict[str, Union[str, Dict]]
|
||||||
|
|
||||||
with self.archive_class(self.filename) as zin:
|
with self.archive_class(self.filename) as zin:
|
||||||
temp_folder = tempfile.mkdtemp()
|
temp_folder = tempfile.mkdtemp()
|
||||||
@ -264,6 +264,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
|
|
||||||
class TarParser(ArchiveBasedAbstractParser):
|
class TarParser(ArchiveBasedAbstractParser):
|
||||||
mimetypes = {'application/x-tar'}
|
mimetypes = {'application/x-tar'}
|
||||||
|
|
||||||
def __init__(self, filename):
|
def __init__(self, filename):
|
||||||
super().__init__(filename)
|
super().__init__(filename)
|
||||||
# yes, it's tarfile.open and not tarfile.TarFile,
|
# yes, it's tarfile.open and not tarfile.TarFile,
|
||||||
@ -336,7 +337,7 @@ class TarParser(ArchiveBasedAbstractParser):
|
|||||||
return member
|
return member
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
|
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
|
||||||
assert isinstance(member, tarfile.TarInfo) # please mypy
|
assert isinstance(member, tarfile.TarInfo) # please mypy
|
||||||
metadata = {}
|
metadata = {}
|
||||||
if member.mtime != 0:
|
if member.mtime != 0:
|
||||||
@ -358,7 +359,7 @@ class TarParser(ArchiveBasedAbstractParser):
|
|||||||
archive.add(full_path, member.name, filter=TarParser._clean_member) # type: ignore
|
archive.add(full_path, member.name, filter=TarParser._clean_member) # type: ignore
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
|
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
|
||||||
assert isinstance(archive, tarfile.TarFile) # please mypy
|
assert isinstance(archive, tarfile.TarFile) # please mypy
|
||||||
return archive.getmembers() # type: ignore
|
return archive.getmembers() # type: ignore
|
||||||
|
|
||||||
@ -391,7 +392,8 @@ class TarXzParser(TarParser):
|
|||||||
|
|
||||||
class ZipParser(ArchiveBasedAbstractParser):
|
class ZipParser(ArchiveBasedAbstractParser):
|
||||||
mimetypes = {'application/zip'}
|
mimetypes = {'application/zip'}
|
||||||
def __init__(self, filename):
|
|
||||||
|
def __init__(self, filename: str):
|
||||||
super().__init__(filename)
|
super().__init__(filename)
|
||||||
self.archive_class = zipfile.ZipFile
|
self.archive_class = zipfile.ZipFile
|
||||||
self.member_class = zipfile.ZipInfo
|
self.member_class = zipfile.ZipInfo
|
||||||
@ -412,7 +414,7 @@ class ZipParser(ArchiveBasedAbstractParser):
|
|||||||
return member
|
return member
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_member_meta(member: ArchiveMember) -> dict[str, str]:
|
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
|
||||||
assert isinstance(member, zipfile.ZipInfo) # please mypy
|
assert isinstance(member, zipfile.ZipInfo) # please mypy
|
||||||
metadata = {}
|
metadata = {}
|
||||||
if member.create_system == 3: # this is Linux
|
if member.create_system == 3: # this is Linux
|
||||||
@ -439,7 +441,7 @@ class ZipParser(ArchiveBasedAbstractParser):
|
|||||||
compress_type=member.compress_type)
|
compress_type=member.compress_type)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_all_members(archive: ArchiveClass) -> list[ArchiveMember]:
|
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
|
||||||
assert isinstance(archive, zipfile.ZipFile) # please mypy
|
assert isinstance(archive, zipfile.ZipFile) # please mypy
|
||||||
return archive.infolist() # type: ignore
|
return archive.infolist() # type: ignore
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ import mimetypes
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
from typing import Union
|
from typing import Union, Dict
|
||||||
|
|
||||||
import mutagen
|
import mutagen
|
||||||
|
|
||||||
@ -18,7 +18,7 @@ class MutagenParser(abstract.AbstractParser):
|
|||||||
except mutagen.MutagenError:
|
except mutagen.MutagenError:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
|
|
||||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||||
f = mutagen.File(self.filename)
|
f = mutagen.File(self.filename)
|
||||||
if f.tags:
|
if f.tags:
|
||||||
return {k: ', '.join(map(str, v)) for k, v in f.tags.items()}
|
return {k: ', '.join(map(str, v)) for k, v in f.tags.items()}
|
||||||
@ -38,8 +38,8 @@ class MutagenParser(abstract.AbstractParser):
|
|||||||
class MP3Parser(MutagenParser):
|
class MP3Parser(MutagenParser):
|
||||||
mimetypes = {'audio/mpeg', }
|
mimetypes = {'audio/mpeg', }
|
||||||
|
|
||||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||||
metadata = {} # type: dict[str, Union[str, dict]]
|
metadata = {} # type: Dict[str, Union[str, Dict]]
|
||||||
meta = mutagen.File(self.filename).tags
|
meta = mutagen.File(self.filename).tags
|
||||||
if not meta:
|
if not meta:
|
||||||
return metadata
|
return metadata
|
||||||
@ -68,7 +68,7 @@ class FLACParser(MutagenParser):
|
|||||||
f.save(deleteid3=True)
|
f.save(deleteid3=True)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||||
meta = super().get_meta()
|
meta = super().get_meta()
|
||||||
for num, picture in enumerate(mutagen.File(self.filename).pictures):
|
for num, picture in enumerate(mutagen.File(self.filename).pictures):
|
||||||
name = picture.desc if picture.desc else 'Cover %d' % num
|
name = picture.desc if picture.desc else 'Cover %d' % num
|
||||||
@ -98,6 +98,7 @@ class WAVParser(video.AbstractFFmpegParser):
|
|||||||
'MIMEType', 'NumChannels', 'SampleRate', 'SourceFile',
|
'MIMEType', 'NumChannels', 'SampleRate', 'SourceFile',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class AIFFParser(video.AbstractFFmpegParser):
|
class AIFFParser(video.AbstractFFmpegParser):
|
||||||
mimetypes = {'audio/aiff', 'audio/x-aiff'}
|
mimetypes = {'audio/aiff', 'audio/x-aiff'}
|
||||||
meta_allowlist = {'AvgBytesPerSec', 'BitsPerSample', 'Directory',
|
meta_allowlist = {'AvgBytesPerSec', 'BitsPerSample', 'Directory',
|
||||||
|
@ -12,7 +12,7 @@ import shutil
|
|||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
import functools
|
import functools
|
||||||
from typing import Optional
|
from typing import Optional, List
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['PIPE', 'run', 'CalledProcessError']
|
__all__ = ['PIPE', 'run', 'CalledProcessError']
|
||||||
@ -33,7 +33,7 @@ def _get_bwrap_path() -> str:
|
|||||||
|
|
||||||
def _get_bwrap_args(tempdir: str,
|
def _get_bwrap_args(tempdir: str,
|
||||||
input_filename: str,
|
input_filename: str,
|
||||||
output_filename: Optional[str] = None) -> list[str]:
|
output_filename: Optional[str] = None) -> List[str]:
|
||||||
ro_bind_args = []
|
ro_bind_args = []
|
||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
|
|
||||||
|
@ -3,10 +3,11 @@ import re
|
|||||||
import uuid
|
import uuid
|
||||||
import zipfile
|
import zipfile
|
||||||
import xml.etree.ElementTree as ET # type: ignore
|
import xml.etree.ElementTree as ET # type: ignore
|
||||||
from typing import Any
|
from typing import Any, Dict
|
||||||
|
|
||||||
from . import archive, office
|
from . import archive, office
|
||||||
|
|
||||||
|
|
||||||
class EPUBParser(archive.ZipParser):
|
class EPUBParser(archive.ZipParser):
|
||||||
mimetypes = {'application/epub+zip', }
|
mimetypes = {'application/epub+zip', }
|
||||||
metadata_namespace = '{http://purl.org/dc/elements/1.1/}'
|
metadata_namespace = '{http://purl.org/dc/elements/1.1/}'
|
||||||
@ -28,7 +29,6 @@ class EPUBParser(archive.ZipParser):
|
|||||||
}))
|
}))
|
||||||
self.uniqid = uuid.uuid4()
|
self.uniqid = uuid.uuid4()
|
||||||
|
|
||||||
|
|
||||||
def is_archive_valid(self):
|
def is_archive_valid(self):
|
||||||
super().is_archive_valid()
|
super().is_archive_valid()
|
||||||
with zipfile.ZipFile(self.filename) as zin:
|
with zipfile.ZipFile(self.filename) as zin:
|
||||||
@ -37,7 +37,7 @@ class EPUBParser(archive.ZipParser):
|
|||||||
if member_name.endswith('META-INF/encryption.xml'):
|
if member_name.endswith('META-INF/encryption.xml'):
|
||||||
raise ValueError('the file contains encrypted fonts')
|
raise ValueError('the file contains encrypted fonts')
|
||||||
|
|
||||||
def _specific_get_meta(self, full_path, file_path) -> dict[str, Any]:
|
def _specific_get_meta(self, full_path, file_path) -> Dict[str, Any]:
|
||||||
if not file_path.endswith('.opf'):
|
if not file_path.endswith('.opf'):
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@ -73,7 +73,6 @@ class EPUBParser(archive.ZipParser):
|
|||||||
short_empty_elements=False)
|
short_empty_elements=False)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def __handle_tocncx(self, full_path: str) -> bool:
|
def __handle_tocncx(self, full_path: str) -> bool:
|
||||||
try:
|
try:
|
||||||
tree, namespace = office._parse_xml(full_path)
|
tree, namespace = office._parse_xml(full_path)
|
||||||
|
@ -4,7 +4,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
from typing import Union
|
from typing import Union, Set, Dict
|
||||||
|
|
||||||
from . import abstract
|
from . import abstract
|
||||||
from . import bubblewrap
|
from . import bubblewrap
|
||||||
@ -15,9 +15,9 @@ class ExiftoolParser(abstract.AbstractParser):
|
|||||||
from a import file, hence why several parsers are re-using its `get_meta`
|
from a import file, hence why several parsers are re-using its `get_meta`
|
||||||
method.
|
method.
|
||||||
"""
|
"""
|
||||||
meta_allowlist = set() # type: set[str]
|
meta_allowlist = set() # type: Set[str]
|
||||||
|
|
||||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||||
try:
|
try:
|
||||||
if self.sandbox:
|
if self.sandbox:
|
||||||
out = bubblewrap.run([_get_exiftool_path(), '-json',
|
out = bubblewrap.run([_get_exiftool_path(), '-json',
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import shutil
|
import shutil
|
||||||
from typing import Union
|
from typing import Union, Dict
|
||||||
from . import abstract
|
from . import abstract
|
||||||
|
|
||||||
|
|
||||||
@ -7,7 +7,7 @@ class HarmlessParser(abstract.AbstractParser):
|
|||||||
""" This is the parser for filetypes that can not contain metadata. """
|
""" This is the parser for filetypes that can not contain metadata. """
|
||||||
mimetypes = {'text/plain', 'image/x-ms-bmp'}
|
mimetypes = {'text/plain', 'image/x-ms-bmp'}
|
||||||
|
|
||||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||||
return dict()
|
return dict()
|
||||||
|
|
||||||
def remove_all(self) -> bool:
|
def remove_all(self) -> bool:
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from typing import Union, Any
|
from typing import Union, Any, Dict
|
||||||
|
|
||||||
import cairo
|
import cairo
|
||||||
|
|
||||||
@ -48,7 +48,7 @@ class SVGParser(exiftool.ExiftoolParser):
|
|||||||
surface.finish()
|
surface.finish()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||||
meta = super().get_meta()
|
meta = super().get_meta()
|
||||||
|
|
||||||
# The namespace is mandatory, but only the …/2000/svg is valid.
|
# The namespace is mandatory, but only the …/2000/svg is valid.
|
||||||
@ -57,6 +57,7 @@ class SVGParser(exiftool.ExiftoolParser):
|
|||||||
meta.pop('Xmlns')
|
meta.pop('Xmlns')
|
||||||
return meta
|
return meta
|
||||||
|
|
||||||
|
|
||||||
class PNGParser(exiftool.ExiftoolParser):
|
class PNGParser(exiftool.ExiftoolParser):
|
||||||
mimetypes = {'image/png', }
|
mimetypes = {'image/png', }
|
||||||
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
|
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
|
||||||
@ -156,11 +157,12 @@ class TiffParser(GdkPixbufAbstractParser):
|
|||||||
'FileTypeExtension', 'ImageHeight', 'ImageSize',
|
'FileTypeExtension', 'ImageHeight', 'ImageSize',
|
||||||
'ImageWidth', 'MIMEType', 'Megapixels', 'SourceFile'}
|
'ImageWidth', 'MIMEType', 'Megapixels', 'SourceFile'}
|
||||||
|
|
||||||
|
|
||||||
class PPMParser(abstract.AbstractParser):
|
class PPMParser(abstract.AbstractParser):
|
||||||
mimetypes = {'image/x-portable-pixmap'}
|
mimetypes = {'image/x-portable-pixmap'}
|
||||||
|
|
||||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||||
meta = {} # type: dict[str, Union[str, dict[Any, Any]]]
|
meta = {} # type: Dict[str, Union[str, Dict[Any, Any]]]
|
||||||
with open(self.filename) as f:
|
with open(self.filename) as f:
|
||||||
for idx, line in enumerate(f):
|
for idx, line in enumerate(f):
|
||||||
if line.lstrip().startswith('#'):
|
if line.lstrip().startswith('#'):
|
||||||
@ -176,6 +178,7 @@ class PPMParser(abstract.AbstractParser):
|
|||||||
fout.write(line)
|
fout.write(line)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
class HEICParser(exiftool.ExiftoolParser):
|
class HEICParser(exiftool.ExiftoolParser):
|
||||||
mimetypes = {'image/heic'}
|
mimetypes = {'image/heic'}
|
||||||
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
|
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
|
||||||
|
@ -4,7 +4,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import zipfile
|
import zipfile
|
||||||
from typing import Pattern, Any
|
from typing import Pattern, Any, Tuple, Dict
|
||||||
|
|
||||||
import xml.etree.ElementTree as ET # type: ignore
|
import xml.etree.ElementTree as ET # type: ignore
|
||||||
|
|
||||||
@ -12,7 +12,8 @@ from .archive import ZipParser
|
|||||||
|
|
||||||
# pylint: disable=line-too-long
|
# pylint: disable=line-too-long
|
||||||
|
|
||||||
def _parse_xml(full_path: str) -> tuple[ET.ElementTree, dict[str, str]]:
|
|
||||||
|
def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]:
|
||||||
""" This function parses XML, with namespace support. """
|
""" This function parses XML, with namespace support. """
|
||||||
namespace_map = dict()
|
namespace_map = dict()
|
||||||
for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
|
for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
|
||||||
@ -68,7 +69,6 @@ class MSOfficeParser(ZipParser):
|
|||||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml',
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, filename):
|
def __init__(self, filename):
|
||||||
super().__init__(filename)
|
super().__init__(filename)
|
||||||
|
|
||||||
@ -229,7 +229,6 @@ class MSOfficeParser(ZipParser):
|
|||||||
tree.write(full_path, xml_declaration=True)
|
tree.write(full_path, xml_declaration=True)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __remove_revisions(full_path: str) -> bool:
|
def __remove_revisions(full_path: str) -> bool:
|
||||||
try:
|
try:
|
||||||
@ -319,7 +318,6 @@ class MSOfficeParser(ZipParser):
|
|||||||
for i in re.findall(r'<p:cNvPr id="([0-9]+)"', content):
|
for i in re.findall(r'<p:cNvPr id="([0-9]+)"', content):
|
||||||
self.__counters['cNvPr'].add(int(i))
|
self.__counters['cNvPr'].add(int(i))
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __randomize_creationId(full_path: str) -> bool:
|
def __randomize_creationId(full_path: str) -> bool:
|
||||||
try:
|
try:
|
||||||
@ -459,7 +457,6 @@ class LibreOfficeParser(ZipParser):
|
|||||||
'application/vnd.oasis.opendocument.image',
|
'application/vnd.oasis.opendocument.image',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, filename):
|
def __init__(self, filename):
|
||||||
super().__init__(filename)
|
super().__init__(filename)
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ import re
|
|||||||
import logging
|
import logging
|
||||||
import tempfile
|
import tempfile
|
||||||
import io
|
import io
|
||||||
from typing import Union
|
from typing import Union, Dict
|
||||||
|
|
||||||
import cairo
|
import cairo
|
||||||
import gi
|
import gi
|
||||||
@ -18,6 +18,7 @@ from . import abstract
|
|||||||
|
|
||||||
FIXED_PDF_VERSION = cairo.PDFVersion.VERSION_1_5
|
FIXED_PDF_VERSION = cairo.PDFVersion.VERSION_1_5
|
||||||
|
|
||||||
|
|
||||||
class PDFParser(abstract.AbstractParser):
|
class PDFParser(abstract.AbstractParser):
|
||||||
mimetypes = {'application/pdf', }
|
mimetypes = {'application/pdf', }
|
||||||
meta_list = {'author', 'creation-date', 'creator', 'format', 'keywords',
|
meta_list = {'author', 'creation-date', 'creator', 'format', 'keywords',
|
||||||
@ -140,13 +141,13 @@ class PDFParser(abstract.AbstractParser):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __parse_metadata_field(data: str) -> dict[str, str]:
|
def __parse_metadata_field(data: str) -> Dict[str, str]:
|
||||||
metadata = {}
|
metadata = {}
|
||||||
for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I):
|
for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I):
|
||||||
metadata[key] = value
|
metadata[key] = value
|
||||||
return metadata
|
return metadata
|
||||||
|
|
||||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||||
""" Return a dict with all the meta of the file
|
""" Return a dict with all the meta of the file
|
||||||
"""
|
"""
|
||||||
metadata = {}
|
metadata = {}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Union
|
from typing import Union, Dict, List, Tuple
|
||||||
|
|
||||||
from . import abstract
|
from . import abstract
|
||||||
|
|
||||||
@ -15,7 +15,7 @@ class TorrentParser(abstract.AbstractParser):
|
|||||||
if self.dict_repr is None:
|
if self.dict_repr is None:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
|
|
||||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||||
metadata = {}
|
metadata = {}
|
||||||
for key, value in self.dict_repr.items():
|
for key, value in self.dict_repr.items():
|
||||||
if key not in self.allowlist:
|
if key not in self.allowlist:
|
||||||
@ -56,7 +56,7 @@ class _BencodeHandler:
|
|||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __decode_int(s: bytes) -> tuple[int, bytes]:
|
def __decode_int(s: bytes) -> Tuple[int, bytes]:
|
||||||
s = s[1:]
|
s = s[1:]
|
||||||
next_idx = s.index(b'e')
|
next_idx = s.index(b'e')
|
||||||
if s.startswith(b'-0'):
|
if s.startswith(b'-0'):
|
||||||
@ -66,7 +66,7 @@ class _BencodeHandler:
|
|||||||
return int(s[:next_idx]), s[next_idx+1:]
|
return int(s[:next_idx]), s[next_idx+1:]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __decode_string(s: bytes) -> tuple[bytes, bytes]:
|
def __decode_string(s: bytes) -> Tuple[bytes, bytes]:
|
||||||
colon = s.index(b':')
|
colon = s.index(b':')
|
||||||
# FIXME Python3 is broken here, the call to `ord` shouldn't be needed,
|
# FIXME Python3 is broken here, the call to `ord` shouldn't be needed,
|
||||||
# but apparently it is. This is utterly idiotic.
|
# but apparently it is. This is utterly idiotic.
|
||||||
@ -76,7 +76,7 @@ class _BencodeHandler:
|
|||||||
s = s[1:]
|
s = s[1:]
|
||||||
return s[colon:colon+str_len], s[colon+str_len:]
|
return s[colon:colon+str_len], s[colon+str_len:]
|
||||||
|
|
||||||
def __decode_list(self, s: bytes) -> tuple[list, bytes]:
|
def __decode_list(self, s: bytes) -> Tuple[List, bytes]:
|
||||||
ret = list()
|
ret = list()
|
||||||
s = s[1:] # skip leading `l`
|
s = s[1:] # skip leading `l`
|
||||||
while s[0] != ord('e'):
|
while s[0] != ord('e'):
|
||||||
@ -84,7 +84,7 @@ class _BencodeHandler:
|
|||||||
ret.append(value)
|
ret.append(value)
|
||||||
return ret, s[1:]
|
return ret, s[1:]
|
||||||
|
|
||||||
def __decode_dict(self, s: bytes) -> tuple[dict, bytes]:
|
def __decode_dict(self, s: bytes) -> Tuple[Dict, bytes]:
|
||||||
ret = dict()
|
ret = dict()
|
||||||
s = s[1:] # skip leading `d`
|
s = s[1:] # skip leading `d`
|
||||||
while s[0] != ord(b'e'):
|
while s[0] != ord(b'e'):
|
||||||
@ -113,10 +113,10 @@ class _BencodeHandler:
|
|||||||
ret += self.__encode_func[type(value)](value)
|
ret += self.__encode_func[type(value)](value)
|
||||||
return b'd' + ret + b'e'
|
return b'd' + ret + b'e'
|
||||||
|
|
||||||
def bencode(self, s: Union[dict, list, bytes, int]) -> bytes:
|
def bencode(self, s: Union[Dict, List, bytes, int]) -> bytes:
|
||||||
return self.__encode_func[type(s)](s)
|
return self.__encode_func[type(s)](s)
|
||||||
|
|
||||||
def bdecode(self, s: bytes) -> Union[dict, None]:
|
def bdecode(self, s: bytes) -> Union[Dict, None]:
|
||||||
try:
|
try:
|
||||||
ret, trail = self.__decode_func[s[0]](s)
|
ret, trail = self.__decode_func[s[0]](s)
|
||||||
except (IndexError, KeyError, ValueError) as e:
|
except (IndexError, KeyError, ValueError) as e:
|
||||||
|
@ -3,7 +3,7 @@ import functools
|
|||||||
import shutil
|
import shutil
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from typing import Union
|
from typing import Union, Dict
|
||||||
|
|
||||||
from . import exiftool
|
from . import exiftool
|
||||||
from . import bubblewrap
|
from . import bubblewrap
|
||||||
@ -12,7 +12,7 @@ from . import bubblewrap
|
|||||||
class AbstractFFmpegParser(exiftool.ExiftoolParser):
|
class AbstractFFmpegParser(exiftool.ExiftoolParser):
|
||||||
""" Abstract parser for all FFmpeg-based ones, mainly for video. """
|
""" Abstract parser for all FFmpeg-based ones, mainly for video. """
|
||||||
# Some fileformats have mandatory metadata fields
|
# Some fileformats have mandatory metadata fields
|
||||||
meta_key_value_allowlist = {} # type: dict[str, Union[str, int]]
|
meta_key_value_allowlist = {} # type: Dict[str, Union[str, int]]
|
||||||
|
|
||||||
def remove_all(self) -> bool:
|
def remove_all(self) -> bool:
|
||||||
if self.meta_key_value_allowlist:
|
if self.meta_key_value_allowlist:
|
||||||
@ -45,10 +45,10 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser):
|
|||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_meta(self) -> dict[str, Union[str, dict]]:
|
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||||
meta = super().get_meta()
|
meta = super().get_meta()
|
||||||
|
|
||||||
ret = dict() # type: dict[str, Union[str, dict]]
|
ret = dict() # type: Dict[str, Union[str, Dict]]
|
||||||
for key, value in meta.items():
|
for key, value in meta.items():
|
||||||
if key in self.meta_key_value_allowlist:
|
if key in self.meta_key_value_allowlist:
|
||||||
if value == self.meta_key_value_allowlist[key]:
|
if value == self.meta_key_value_allowlist[key]:
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from html import parser, escape
|
from html import parser, escape
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional, Dict, List, Tuple, Set
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
|
|
||||||
@ -25,7 +25,7 @@ class CSSParser(abstract.AbstractParser):
|
|||||||
f.write(cleaned)
|
f.write(cleaned)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_meta(self) -> dict[str, Any]:
|
def get_meta(self) -> Dict[str, Any]:
|
||||||
metadata = {}
|
metadata = {}
|
||||||
with open(self.filename, encoding='utf-8') as f:
|
with open(self.filename, encoding='utf-8') as f:
|
||||||
try:
|
try:
|
||||||
@ -44,10 +44,10 @@ class CSSParser(abstract.AbstractParser):
|
|||||||
|
|
||||||
|
|
||||||
class AbstractHTMLParser(abstract.AbstractParser):
|
class AbstractHTMLParser(abstract.AbstractParser):
|
||||||
tags_blocklist = set() # type: set[str]
|
tags_blocklist = set() # type: Set[str]
|
||||||
# In some html/xml-based formats some tags are mandatory,
|
# In some html/xml-based formats some tags are mandatory,
|
||||||
# so we're keeping them, but are discarding their content
|
# so we're keeping them, but are discarding their content
|
||||||
tags_required_blocklist = set() # type: set[str]
|
tags_required_blocklist = set() # type: Set[str]
|
||||||
|
|
||||||
def __init__(self, filename):
|
def __init__(self, filename):
|
||||||
super().__init__(filename)
|
super().__init__(filename)
|
||||||
@ -57,7 +57,7 @@ class AbstractHTMLParser(abstract.AbstractParser):
|
|||||||
self.__parser.feed(f.read())
|
self.__parser.feed(f.read())
|
||||||
self.__parser.close()
|
self.__parser.close()
|
||||||
|
|
||||||
def get_meta(self) -> dict[str, Any]:
|
def get_meta(self) -> Dict[str, Any]:
|
||||||
return self.__parser.get_meta()
|
return self.__parser.get_meta()
|
||||||
|
|
||||||
def remove_all(self) -> bool:
|
def remove_all(self) -> bool:
|
||||||
@ -112,7 +112,7 @@ class _HTMLParser(parser.HTMLParser):
|
|||||||
"""
|
"""
|
||||||
raise ValueError(message)
|
raise ValueError(message)
|
||||||
|
|
||||||
def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]):
|
def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
|
||||||
# Ignore the type, because mypy is too stupid to infer
|
# Ignore the type, because mypy is too stupid to infer
|
||||||
# that get_starttag_text() can't return None.
|
# that get_starttag_text() can't return None.
|
||||||
original_tag = self.get_starttag_text() # type: ignore
|
original_tag = self.get_starttag_text() # type: ignore
|
||||||
@ -159,7 +159,7 @@ class _HTMLParser(parser.HTMLParser):
|
|||||||
self.__textrepr += escape(data)
|
self.__textrepr += escape(data)
|
||||||
|
|
||||||
def handle_startendtag(self, tag: str,
|
def handle_startendtag(self, tag: str,
|
||||||
attrs: list[tuple[str, Optional[str]]]):
|
attrs: List[Tuple[str, Optional[str]]]):
|
||||||
if tag in self.tag_required_blocklist | self.tag_blocklist:
|
if tag in self.tag_required_blocklist | self.tag_blocklist:
|
||||||
meta = {k:v for k, v in attrs}
|
meta = {k:v for k, v in attrs}
|
||||||
name = meta.get('name', 'harmful metadata')
|
name = meta.get('name', 'harmful metadata')
|
||||||
@ -184,7 +184,7 @@ class _HTMLParser(parser.HTMLParser):
|
|||||||
f.write(self.__textrepr)
|
f.write(self.__textrepr)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_meta(self) -> dict[str, Any]:
|
def get_meta(self) -> Dict[str, Any]:
|
||||||
if self.__validation_queue:
|
if self.__validation_queue:
|
||||||
raise ValueError("Some tags (%s) were left unclosed in %s" % (
|
raise ValueError("Some tags (%s) were left unclosed in %s" % (
|
||||||
', '.join(self.__validation_queue),
|
', '.join(self.__validation_queue),
|
||||||
|
Loading…
Reference in New Issue
Block a user