2019-05-16 00:31:40 +02:00
|
|
|
import functools
|
2018-10-18 19:19:56 +02:00
|
|
|
import json
|
2018-10-23 16:14:21 +02:00
|
|
|
import logging
|
2018-10-18 19:19:56 +02:00
|
|
|
import os
|
2020-02-10 03:31:07 +01:00
|
|
|
import shutil
|
2019-10-13 01:13:49 +02:00
|
|
|
import subprocess
|
2023-01-28 16:57:20 +01:00
|
|
|
from typing import Union, Set, Dict
|
2018-10-18 19:19:56 +02:00
|
|
|
|
|
|
|
from . import abstract
|
2019-10-13 01:13:49 +02:00
|
|
|
from . import bubblewrap
|
2018-10-18 19:19:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
class ExiftoolParser(abstract.AbstractParser):
|
|
|
|
""" Exiftool is often the easiest way to get all the metadata
|
|
|
|
from a import file, hence why several parsers are re-using its `get_meta`
|
|
|
|
method.
|
|
|
|
"""
|
2023-01-28 16:57:20 +01:00
|
|
|
meta_allowlist = set() # type: Set[str]
|
2018-10-18 19:19:56 +02:00
|
|
|
|
2023-01-28 16:57:20 +01:00
|
|
|
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
2019-12-15 18:04:51 +01:00
|
|
|
try:
|
|
|
|
if self.sandbox:
|
|
|
|
out = bubblewrap.run([_get_exiftool_path(), '-json',
|
|
|
|
self.filename],
|
|
|
|
input_filename=self.filename,
|
|
|
|
check=True, stdout=subprocess.PIPE).stdout
|
|
|
|
else:
|
|
|
|
out = subprocess.run([_get_exiftool_path(), '-json',
|
|
|
|
self.filename],
|
|
|
|
check=True, stdout=subprocess.PIPE).stdout
|
|
|
|
except subprocess.CalledProcessError: # pragma: no cover
|
|
|
|
raise ValueError
|
2018-10-18 19:19:56 +02:00
|
|
|
meta = json.loads(out.decode('utf-8'))[0]
|
2019-02-20 00:45:27 +01:00
|
|
|
for key in self.meta_allowlist:
|
2018-10-18 19:19:56 +02:00
|
|
|
meta.pop(key, None)
|
|
|
|
return meta
|
|
|
|
|
2018-10-23 16:32:28 +02:00
|
|
|
def _lightweight_cleanup(self) -> bool:
|
2018-10-23 16:14:21 +02:00
|
|
|
if os.path.exists(self.output_filename):
|
2019-10-12 22:32:04 +02:00
|
|
|
try: # exiftool can't force output to existing files
|
2018-10-23 16:14:21 +02:00
|
|
|
os.remove(self.output_filename)
|
|
|
|
except OSError as e: # pragma: no cover
|
|
|
|
logging.error("The output file %s is already existing and \
|
|
|
|
can't be overwritten: %s.", self.filename, e)
|
|
|
|
return False
|
|
|
|
|
|
|
|
# Note: '-All=' must be followed by a known exiftool option.
|
|
|
|
# Also, '-CommonIFD0' is needed for .tiff files
|
|
|
|
cmd = [_get_exiftool_path(),
|
|
|
|
'-all=', # remove metadata
|
|
|
|
'-adobe=', # remove adobe-specific metadata
|
|
|
|
'-exif:all=', # remove all exif metadata
|
|
|
|
'-Time:All=', # remove all timestamps
|
|
|
|
'-quiet', # don't show useless logs
|
|
|
|
'-CommonIFD0=', # remove IFD0 metadata
|
|
|
|
'-o', self.output_filename,
|
|
|
|
self.filename]
|
|
|
|
try:
|
2019-10-13 01:13:49 +02:00
|
|
|
if self.sandbox:
|
|
|
|
bubblewrap.run(cmd, check=True,
|
|
|
|
input_filename=self.filename,
|
|
|
|
output_filename=self.output_filename)
|
|
|
|
else:
|
|
|
|
subprocess.run(cmd, check=True)
|
2018-10-23 16:14:21 +02:00
|
|
|
except subprocess.CalledProcessError as e: # pragma: no cover
|
|
|
|
logging.error("Something went wrong during the processing of %s: %s", self.filename, e)
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
2022-08-05 20:43:37 +02:00
|
|
|
@functools.lru_cache
|
2018-10-18 19:19:56 +02:00
|
|
|
def _get_exiftool_path() -> str: # pragma: no cover
|
2020-02-10 03:31:07 +01:00
|
|
|
which_path = shutil.which('exiftool')
|
|
|
|
if which_path:
|
|
|
|
return which_path
|
2018-10-18 19:19:56 +02:00
|
|
|
|
2020-02-10 03:31:07 +01:00
|
|
|
# Exiftool on Arch Linux has a weird path
|
|
|
|
if os.access('/usr/bin/vendor_perl/exiftool', os.X_OK):
|
|
|
|
return '/usr/bin/vendor_perl/exiftool'
|
2018-10-18 19:19:56 +02:00
|
|
|
|
|
|
|
raise RuntimeError("Unable to find exiftool")
|