2018-06-04 00:14:24 +02:00
|
|
|
import glob
|
2018-05-15 23:29:04 +02:00
|
|
|
import os
|
2018-03-19 23:43:49 +01:00
|
|
|
import mimetypes
|
2018-03-20 01:20:11 +01:00
|
|
|
import importlib
|
2023-01-28 17:22:26 +01:00
|
|
|
from typing import TypeVar, Optional, List
|
2018-03-19 23:43:49 +01:00
|
|
|
|
2018-07-08 22:40:36 +02:00
|
|
|
from . import abstract, UNSUPPORTED_EXTENSIONS
|
2018-03-20 01:20:11 +01:00
|
|
|
|
2018-04-02 19:11:59 +02:00
|
|
|
T = TypeVar('T', bound='abstract.AbstractParser')
|
|
|
|
|
2019-02-21 01:28:11 +01:00
|
|
|
mimetypes.add_type('application/epub+zip', '.epub')
|
2019-05-09 09:41:05 +02:00
|
|
|
mimetypes.add_type('application/x-dtbncx+xml', '.ncx') # EPUB Navigation Control XML File
|
2019-02-21 01:28:11 +01:00
|
|
|
|
2022-05-15 18:57:27 +02:00
|
|
|
# This should be removed after we move to python3.10
|
|
|
|
# https://github.com/python/cpython/commit/20a5b7e986377bdfd929d7e8c4e3db5847dfdb2d
|
|
|
|
mimetypes.add_type('image/heic', '.heic')
|
|
|
|
|
2019-02-03 10:43:27 +01:00
|
|
|
|
2018-06-04 00:14:24 +02:00
|
|
|
def __load_all_parsers():
|
|
|
|
""" Loads every parser in a dynamic way """
|
|
|
|
current_dir = os.path.dirname(__file__)
|
2018-07-19 23:10:27 +02:00
|
|
|
for fname in glob.glob(os.path.join(current_dir, '*.py')):
|
|
|
|
if fname.endswith('abstract.py'):
|
2018-06-04 00:14:24 +02:00
|
|
|
continue
|
2018-07-19 23:10:27 +02:00
|
|
|
elif fname.endswith('__init__.py'):
|
|
|
|
continue
|
2018-10-18 19:19:56 +02:00
|
|
|
elif fname.endswith('exiftool.py'):
|
|
|
|
continue
|
2018-07-19 23:10:27 +02:00
|
|
|
basename = os.path.basename(fname)
|
2018-06-04 00:14:24 +02:00
|
|
|
name, _ = os.path.splitext(basename)
|
|
|
|
importlib.import_module('.' + name, package='libmat2')
|
|
|
|
|
2019-02-03 10:43:27 +01:00
|
|
|
|
2018-06-04 00:14:24 +02:00
|
|
|
__load_all_parsers()
|
2018-04-04 23:21:48 +02:00
|
|
|
|
2019-02-03 10:43:27 +01:00
|
|
|
|
2023-01-28 17:22:26 +01:00
|
|
|
def _get_parsers() -> List[T]:
|
2018-04-03 23:57:13 +02:00
|
|
|
""" Get all our parsers!"""
|
|
|
|
def __get_parsers(cls):
|
|
|
|
return cls.__subclasses__() + \
|
|
|
|
[g for s in cls.__subclasses__() for g in __get_parsers(s)]
|
|
|
|
return __get_parsers(abstract.AbstractParser)
|
|
|
|
|
2018-04-04 23:21:48 +02:00
|
|
|
|
2022-08-28 22:29:06 +02:00
|
|
|
def get_parser(filename: str) -> tuple[Optional[T], Optional[str]]:
|
2020-11-23 19:50:46 +01:00
|
|
|
""" Return the appropriate parser for a given filename.
|
|
|
|
|
|
|
|
:raises ValueError: Raised if the instantiation of the parser went wrong.
|
|
|
|
"""
|
2018-05-16 00:08:45 +02:00
|
|
|
mtype, _ = mimetypes.guess_type(filename)
|
|
|
|
|
2018-05-15 23:29:04 +02:00
|
|
|
_, extension = os.path.splitext(filename)
|
2018-08-23 21:28:37 +02:00
|
|
|
if extension.lower() in UNSUPPORTED_EXTENSIONS:
|
2018-05-15 23:29:04 +02:00
|
|
|
return None, mtype
|
2018-04-03 23:57:13 +02:00
|
|
|
|
2019-04-27 15:03:09 +02:00
|
|
|
if mtype == 'application/x-tar':
|
|
|
|
if extension[1:] in ('bz2', 'gz', 'xz'):
|
|
|
|
mtype = mtype + '+' + extension[1:]
|
|
|
|
|
2018-07-02 00:22:05 +02:00
|
|
|
for parser_class in _get_parsers(): # type: ignore
|
|
|
|
if mtype in parser_class.mimetypes:
|
2020-11-23 19:50:46 +01:00
|
|
|
# This instantiation might raise a ValueError on malformed files
|
|
|
|
return parser_class(filename), mtype
|
2018-04-02 17:36:26 +02:00
|
|
|
return None, mtype
|