2019-05-16 00:31:40 +02:00
|
|
|
import functools
|
2018-10-18 19:19:56 +02:00
|
|
|
import json
|
2018-10-23 16:14:21 +02:00
|
|
|
import logging
|
2018-10-18 19:19:56 +02:00
|
|
|
import os
|
2019-10-13 01:13:49 +02:00
|
|
|
import subprocess
|
2018-10-22 19:12:39 +02:00
|
|
|
from typing import Dict, Union, Set
|
2018-10-18 19:19:56 +02:00
|
|
|
|
|
|
|
from . import abstract
|
2019-10-13 01:13:49 +02:00
|
|
|
from . import bubblewrap
|
2018-10-18 19:19:56 +02:00
|
|
|
|
|
|
|
# Make pyflakes happy
|
|
|
|
assert Set
|
|
|
|
|
|
|
|
|
|
|
|
class ExiftoolParser(abstract.AbstractParser):
|
|
|
|
""" Exiftool is often the easiest way to get all the metadata
|
|
|
|
from a import file, hence why several parsers are re-using its `get_meta`
|
|
|
|
method.
|
|
|
|
"""
|
2019-02-20 00:45:27 +01:00
|
|
|
meta_allowlist = set() # type: Set[str]
|
2018-10-18 19:19:56 +02:00
|
|
|
|
|
|
|
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
2019-12-15 18:04:51 +01:00
|
|
|
try:
|
|
|
|
if self.sandbox:
|
|
|
|
out = bubblewrap.run([_get_exiftool_path(), '-json',
|
|
|
|
self.filename],
|
|
|
|
input_filename=self.filename,
|
|
|
|
check=True, stdout=subprocess.PIPE).stdout
|
|
|
|
else:
|
|
|
|
out = subprocess.run([_get_exiftool_path(), '-json',
|
|
|
|
self.filename],
|
|
|
|
check=True, stdout=subprocess.PIPE).stdout
|
|
|
|
except subprocess.CalledProcessError: # pragma: no cover
|
|
|
|
raise ValueError
|
2018-10-18 19:19:56 +02:00
|
|
|
meta = json.loads(out.decode('utf-8'))[0]
|
2019-02-20 00:45:27 +01:00
|
|
|
for key in self.meta_allowlist:
|
2018-10-18 19:19:56 +02:00
|
|
|
meta.pop(key, None)
|
|
|
|
return meta
|
|
|
|
|
2018-10-23 16:32:28 +02:00
|
|
|
def _lightweight_cleanup(self) -> bool:
|
2018-10-23 16:14:21 +02:00
|
|
|
if os.path.exists(self.output_filename):
|
2019-10-12 22:32:04 +02:00
|
|
|
try: # exiftool can't force output to existing files
|
2018-10-23 16:14:21 +02:00
|
|
|
os.remove(self.output_filename)
|
|
|
|
except OSError as e: # pragma: no cover
|
|
|
|
logging.error("The output file %s is already existing and \
|
|
|
|
can't be overwritten: %s.", self.filename, e)
|
|
|
|
return False
|
|
|
|
|
|
|
|
# Note: '-All=' must be followed by a known exiftool option.
|
|
|
|
# Also, '-CommonIFD0' is needed for .tiff files
|
|
|
|
cmd = [_get_exiftool_path(),
|
|
|
|
'-all=', # remove metadata
|
|
|
|
'-adobe=', # remove adobe-specific metadata
|
|
|
|
'-exif:all=', # remove all exif metadata
|
|
|
|
'-Time:All=', # remove all timestamps
|
|
|
|
'-quiet', # don't show useless logs
|
|
|
|
'-CommonIFD0=', # remove IFD0 metadata
|
|
|
|
'-o', self.output_filename,
|
|
|
|
self.filename]
|
|
|
|
try:
|
2019-10-13 01:13:49 +02:00
|
|
|
if self.sandbox:
|
|
|
|
bubblewrap.run(cmd, check=True,
|
|
|
|
input_filename=self.filename,
|
|
|
|
output_filename=self.output_filename)
|
|
|
|
else:
|
|
|
|
subprocess.run(cmd, check=True)
|
2018-10-23 16:14:21 +02:00
|
|
|
except subprocess.CalledProcessError as e: # pragma: no cover
|
|
|
|
logging.error("Something went wrong during the processing of %s: %s", self.filename, e)
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
2019-05-16 00:31:40 +02:00
|
|
|
@functools.lru_cache()
|
2018-10-18 19:19:56 +02:00
|
|
|
def _get_exiftool_path() -> str: # pragma: no cover
|
2018-10-25 11:05:06 +02:00
|
|
|
possible_pathes = {
|
|
|
|
'/usr/bin/exiftool', # debian/fedora
|
|
|
|
'/usr/bin/vendor_perl/exiftool', # archlinux
|
|
|
|
}
|
2018-10-18 19:19:56 +02:00
|
|
|
|
2018-10-25 11:05:06 +02:00
|
|
|
for possible_path in possible_pathes:
|
|
|
|
if os.path.isfile(possible_path):
|
|
|
|
if os.access(possible_path, os.X_OK):
|
|
|
|
return possible_path
|
2018-10-18 19:19:56 +02:00
|
|
|
|
|
|
|
raise RuntimeError("Unable to find exiftool")
|