diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 10e0009..f74cc9e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -42,6 +42,17 @@ tests:debian: script: - apt-get -qqy update - apt-get -qqy install --no-install-recommends python3-mutagen python3-gi-cairo gir1.2-poppler-0.18 gir1.2-gdkpixbuf-2.0 libimage-exiftool-perl python3-coverage ffmpeg + - apt-get -qqy purge bubblewrap + - python3-coverage run --branch -m unittest discover -s tests/ + - python3-coverage report --fail-under=90 -m --include 'libmat2/*' + +tests:debian_with_bubblewrap: + stage: test + tags: + - whitewhale + script: + - apt-get -qqy update + - apt-get -qqy install --no-install-recommends python3-mutagen python3-gi-cairo gir1.2-poppler-0.18 gir1.2-gdkpixbuf-2.0 libimage-exiftool-perl python3-coverage ffmpeg bubblewrap - python3-coverage run --branch -m unittest discover -s tests/ - python3-coverage report --fail-under=100 -m --include 'libmat2/*' diff --git a/INSTALL.md b/INSTALL.md index a2b9230..53264c2 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -9,9 +9,13 @@ installed like this: pip3 install mat2 ``` - # GNU/Linux +## Optional dependencies + +When [bubblewrap](https://github.com/projectatomic/bubblewrap) is +installed, MAT2 uses it to sandbox any external processes it invokes. + ## Fedora Thanks to [atenart](https://ack.tf/), there is a package available on diff --git a/libmat2/__init__.py b/libmat2/__init__.py index 58cf976..be7067b 100644 --- a/libmat2/__init__.py +++ b/libmat2/__init__.py @@ -39,12 +39,11 @@ DEPENDENCIES = { } - def check_dependencies() -> Dict[str, bool]: ret = collections.defaultdict(bool) # type: Dict[str, bool] - ret['Exiftool'] = True if exiftool._get_exiftool_path() else False - ret['Ffmpeg'] = True if video._get_ffmpeg_path() else False + ret['Exiftool'] = bool(exiftool._get_exiftool_path()) + ret['Ffmpeg'] = bool(video._get_ffmpeg_path()) for key, value in DEPENDENCIES.items(): ret[value] = True @@ -55,6 +54,7 @@ def check_dependencies() -> Dict[str, bool]: return ret + @enum.unique class UnknownMemberPolicy(enum.Enum): ABORT = 'abort' diff --git a/libmat2/abstract.py b/libmat2/abstract.py index 9b510f6..aaf00d7 100644 --- a/libmat2/abstract.py +++ b/libmat2/abstract.py @@ -37,4 +37,5 @@ class AbstractParser(abc.ABC): """ :raises RuntimeError: Raised if the cleaning process went wrong. """ + # pylint: disable=unnecessary-pass pass # pragma: no cover diff --git a/libmat2/archive.py b/libmat2/archive.py index bcf8d33..b2483fc 100644 --- a/libmat2/archive.py +++ b/libmat2/archive.py @@ -132,7 +132,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): logging.warning("In file %s, keeping unknown element %s (format: %s)", self.filename, item.filename, mtype) else: - logging.error("In file %s, element %s's format (%s) " + + logging.error("In file %s, element %s's format (%s) " \ "isn't supported", self.filename, item.filename, mtype) abort = True diff --git a/libmat2/exiftool.py b/libmat2/exiftool.py index adec28e..db92f60 100644 --- a/libmat2/exiftool.py +++ b/libmat2/exiftool.py @@ -1,10 +1,10 @@ import json import logging import os -import subprocess from typing import Dict, Union, Set from . import abstract +from . import subprocess # Make pyflakes happy assert Set @@ -18,7 +18,9 @@ class ExiftoolParser(abstract.AbstractParser): meta_whitelist = set() # type: Set[str] def get_meta(self) -> Dict[str, Union[str, dict]]: - out = subprocess.check_output([_get_exiftool_path(), '-json', self.filename]) + out = subprocess.run([_get_exiftool_path(), '-json', self.filename], + input_filename=self.filename, + check=True, stdout=subprocess.PIPE).stdout meta = json.loads(out.decode('utf-8'))[0] for key in self.meta_whitelist: meta.pop(key, None) @@ -46,7 +48,9 @@ class ExiftoolParser(abstract.AbstractParser): '-o', self.output_filename, self.filename] try: - subprocess.check_call(cmd) + subprocess.run(cmd, check=True, + input_filename=self.filename, + output_filename=self.output_filename) except subprocess.CalledProcessError as e: # pragma: no cover logging.error("Something went wrong during the processing of %s: %s", self.filename, e) return False diff --git a/libmat2/office.py b/libmat2/office.py index e6370e7..365c230 100644 --- a/libmat2/office.py +++ b/libmat2/office.py @@ -266,7 +266,6 @@ class MSOfficeParser(ArchiveBasedAbstractParser): f.write(b'') f.write(b'') - if self.__remove_rsid(full_path) is False: return False diff --git a/libmat2/parser_factory.py b/libmat2/parser_factory.py index 4a0ca0d..30c3b52 100644 --- a/libmat2/parser_factory.py +++ b/libmat2/parser_factory.py @@ -10,6 +10,7 @@ assert Tuple # make pyflakes happy T = TypeVar('T', bound='abstract.AbstractParser') + def __load_all_parsers(): """ Loads every parser in a dynamic way """ current_dir = os.path.dirname(__file__) @@ -24,8 +25,10 @@ def __load_all_parsers(): name, _ = os.path.splitext(basename) importlib.import_module('.' + name, package='libmat2') + __load_all_parsers() + def _get_parsers() -> List[T]: """ Get all our parsers!""" def __get_parsers(cls): diff --git a/libmat2/subprocess.py b/libmat2/subprocess.py new file mode 100644 index 0000000..25646f8 --- /dev/null +++ b/libmat2/subprocess.py @@ -0,0 +1,100 @@ +""" +Wrapper around a subset of the subprocess module, +that uses bwrap (bubblewrap) when it is available. + +Instead of importing subprocess, other modules should use this as follows: + + from . import subprocess +""" + +import os +import shutil +import subprocess +import tempfile +from typing import List, Optional + + +__all__ = ['PIPE', 'run', 'CalledProcessError'] +PIPE = subprocess.PIPE +CalledProcessError = subprocess.CalledProcessError + + +def _get_bwrap_path() -> str: + bwrap_path = '/usr/bin/bwrap' + if os.path.isfile(bwrap_path): + if os.access(bwrap_path, os.X_OK): + return bwrap_path + + raise RuntimeError("Unable to find bwrap") # pragma: no cover + + +# pylint: disable=bad-whitespace +def _get_bwrap_args(tempdir: str, + input_filename: str, + output_filename: Optional[str] = None) -> List[str]: + cwd = os.getcwd() + + # XXX: use --ro-bind-try once all supported platforms + # have a bubblewrap recent enough to support it. + ro_bind_dirs = ['/usr', '/lib', '/lib64', '/bin', '/sbin', cwd] + ro_bind_args = [] + for bind_dir in ro_bind_dirs: + if os.path.isdir(bind_dir): # pragma: no cover + ro_bind_args.extend(['--ro-bind', bind_dir, bind_dir]) + + args = ro_bind_args + \ + ['--dev', '/dev', + '--chdir', cwd, + '--unshare-all', + '--new-session', + # XXX: enable --die-with-parent once all supported platforms have + # a bubblewrap recent enough to support it. + # '--die-with-parent', + ] + + if output_filename: + # Mount an empty temporary directory where the sandboxed + # process will create its output file + output_dirname = os.path.dirname(os.path.abspath(output_filename)) + args.extend(['--bind', tempdir, output_dirname]) + + absolute_input_filename = os.path.abspath(input_filename) + args.extend(['--ro-bind', absolute_input_filename, absolute_input_filename]) + + return args + + +# pylint: disable=bad-whitespace +def run(args: List[str], + input_filename: str, + output_filename: Optional[str] = None, + **kwargs) -> subprocess.CompletedProcess: + """Wrapper around `subprocess.run`, that uses bwrap (bubblewrap) if it + is available. + + Extra supported keyword arguments: + + - `input_filename`, made available read-only in the sandbox + - `output_filename`, where the file created by the sandboxed process + is copied upon successful completion; an empty temporary directory + is made visible as the parent directory of this file in the sandbox. + Optional: one valid use case is to invoke an external process + to inspect metadata present in a file. + """ + try: + bwrap_path = _get_bwrap_path() + except RuntimeError: # pragma: no cover + # bubblewrap is not installed ⇒ short-circuit + return subprocess.run(args, **kwargs) + + with tempfile.TemporaryDirectory() as tempdir: + prefix_args = [bwrap_path] + \ + _get_bwrap_args(input_filename=input_filename, + output_filename=output_filename, + tempdir=tempdir) + completed_process = subprocess.run(prefix_args + args, **kwargs) + if output_filename and completed_process.returncode == 0: + shutil.copy(os.path.join(tempdir, os.path.basename(output_filename)), + output_filename) + + return completed_process diff --git a/libmat2/torrent.py b/libmat2/torrent.py index 4d6c1e0..c006f9c 100644 --- a/libmat2/torrent.py +++ b/libmat2/torrent.py @@ -3,6 +3,7 @@ from typing import Union, Tuple, Dict from . import abstract + class TorrentParser(abstract.AbstractParser): mimetypes = {'application/x-bittorrent', } whitelist = {b'announce', b'announce-list', b'info'} @@ -32,7 +33,7 @@ class TorrentParser(abstract.AbstractParser): return True -class _BencodeHandler(object): +class _BencodeHandler(): """ Since bencode isn't that hard to parse, MAT2 comes with its own parser, based on the spec diff --git a/libmat2/video.py b/libmat2/video.py index 825df92..9dc13e1 100644 --- a/libmat2/video.py +++ b/libmat2/video.py @@ -1,10 +1,10 @@ import os -import subprocess import logging from typing import Dict, Union from . import exiftool +from . import subprocess class AbstractFFmpegParser(exiftool.ExiftoolParser): @@ -32,7 +32,9 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser): '-flags:a', '+bitexact', # don't add any metadata self.output_filename] try: - subprocess.check_call(cmd) + subprocess.run(cmd, check=True, + input_filename=self.filename, + output_filename=self.output_filename) except subprocess.CalledProcessError as e: logging.error("Something went wrong during the processing of %s: %s", self.filename, e) return False