Implement support for .avi files, via ffmpeg
- This commit introduces optional dependencies (namely ffmpeg): mat2 will spit a warning when trying to process an .avi file if ffmpeg isn't installed. - Since metadata are obtained via exiftool, this commit also refactors a bit our exfitool wrapper.
This commit is contained in:
parent
2ae5d909c3
commit
e70ea811c9
@ -42,7 +42,7 @@ tests:debian:
|
||||
stage: test
|
||||
script:
|
||||
- apt-get -qqy update
|
||||
- apt-get -qqy install --no-install-recommends python3-mutagen python3-gi-cairo gir1.2-poppler-0.18 gir1.2-gdkpixbuf-2.0 libimage-exiftool-perl python3-coverage
|
||||
- apt-get -qqy install --no-install-recommends python3-mutagen python3-gi-cairo gir1.2-poppler-0.18 gir1.2-gdkpixbuf-2.0 libimage-exiftool-perl python3-coverage ffmpeg
|
||||
- python3-coverage run --branch -m unittest discover -s tests/
|
||||
- python3-coverage report --fail-under=100 -m --include 'libmat2/*'
|
||||
|
||||
@ -62,5 +62,5 @@ tests:archlinux:
|
||||
tags:
|
||||
- whitewhale
|
||||
script:
|
||||
- pacman -Sy --noconfirm python-mutagen python-gobject gdk-pixbuf2 poppler-glib gdk-pixbuf2 python-cairo perl-image-exiftool python-setuptools mailcap
|
||||
- pacman -Sy --noconfirm python-mutagen python-gobject gdk-pixbuf2 poppler-glib gdk-pixbuf2 python-cairo perl-image-exiftool python-setuptools mailcap ffmpeg
|
||||
- python3 setup.py test
|
||||
|
@ -1,11 +1,12 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import collections
|
||||
import enum
|
||||
import importlib
|
||||
from typing import Dict, Optional
|
||||
|
||||
from . import exiftool, video
|
||||
|
||||
# make pyflakes happy
|
||||
assert Dict
|
||||
assert Optional
|
||||
@ -37,24 +38,13 @@ DEPENDENCIES = {
|
||||
'mutagen': 'Mutagen',
|
||||
}
|
||||
|
||||
def _get_exiftool_path() -> str: # pragma: no cover
|
||||
exiftool_path = '/usr/bin/exiftool'
|
||||
if os.path.isfile(exiftool_path):
|
||||
if os.access(exiftool_path, os.X_OK):
|
||||
return exiftool_path
|
||||
|
||||
# ArchLinux
|
||||
exiftool_path = '/usr/bin/vendor_perl/exiftool'
|
||||
if os.path.isfile(exiftool_path):
|
||||
if os.access(exiftool_path, os.X_OK):
|
||||
return exiftool_path
|
||||
|
||||
raise ValueError
|
||||
|
||||
def check_dependencies() -> dict:
|
||||
ret = collections.defaultdict(bool) # type: Dict[str, bool]
|
||||
|
||||
ret['Exiftool'] = True if _get_exiftool_path() else False
|
||||
ret['Exiftool'] = True if exiftool._get_exiftool_path() else False
|
||||
ret['Ffmpeg'] = True if video._get_ffmpeg_path() else False
|
||||
|
||||
for key, value in DEPENDENCIES.items():
|
||||
ret[value] = True
|
||||
|
@ -7,7 +7,8 @@ assert Set # make pyflakes happy
|
||||
|
||||
class AbstractParser(abc.ABC):
|
||||
""" This is the base class of every parser.
|
||||
It might yield `ValueError` on instantiation on invalid files.
|
||||
It might yield `ValueError` on instantiation on invalid files,
|
||||
and `RuntimeError` when something went wrong in `remove_all`.
|
||||
"""
|
||||
meta_list = set() # type: Set[str]
|
||||
mimetypes = set() # type: Set[str]
|
||||
@ -27,4 +28,7 @@ class AbstractParser(abc.ABC):
|
||||
|
||||
@abc.abstractmethod
|
||||
def remove_all(self) -> bool:
|
||||
"""
|
||||
:raises RuntimeError: Raised if the cleaning process went wrong.
|
||||
"""
|
||||
pass # pragma: no cover
|
||||
|
61
libmat2/exiftool.py
Normal file
61
libmat2/exiftool.py
Normal file
@ -0,0 +1,61 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
from typing import Dict, Union, Set
|
||||
|
||||
from . import abstract
|
||||
|
||||
# Make pyflakes happy
|
||||
assert Set
|
||||
|
||||
|
||||
class ExiftoolParser(abstract.AbstractParser):
|
||||
""" Exiftool is often the easiest way to get all the metadata
|
||||
from a import file, hence why several parsers are re-using its `get_meta`
|
||||
method.
|
||||
"""
|
||||
meta_whitelist = set() # type: Set[str]
|
||||
|
||||
@staticmethod
|
||||
def __handle_problematic_filename(filename: str, callback) -> bytes:
|
||||
""" This method takes a filename with a problematic name,
|
||||
and safely applies it a `callback`."""
|
||||
tmpdirname = tempfile.mkdtemp()
|
||||
fname = os.path.join(tmpdirname, "temp_file")
|
||||
shutil.copy(filename, fname)
|
||||
out = callback(fname)
|
||||
shutil.rmtree(tmpdirname)
|
||||
return out
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||
""" There is no way to escape the leading(s) dash(es) of the current
|
||||
self.filename to prevent parameter injections, so we need to take care
|
||||
of this.
|
||||
"""
|
||||
fun = lambda f: subprocess.check_output([_get_exiftool_path(), '-json', f])
|
||||
if re.search('^[a-z0-9/]', self.filename) is None:
|
||||
out = self.__handle_problematic_filename(self.filename, fun)
|
||||
else:
|
||||
out = fun(self.filename)
|
||||
meta = json.loads(out.decode('utf-8'))[0]
|
||||
for key in self.meta_whitelist:
|
||||
meta.pop(key, None)
|
||||
return meta
|
||||
|
||||
def _get_exiftool_path() -> str: # pragma: no cover
|
||||
exiftool_path = '/usr/bin/exiftool'
|
||||
if os.path.isfile(exiftool_path):
|
||||
if os.access(exiftool_path, os.X_OK):
|
||||
return exiftool_path
|
||||
|
||||
# ArchLinux
|
||||
exiftool_path = '/usr/bin/vendor_perl/exiftool'
|
||||
if os.path.isfile(exiftool_path):
|
||||
if os.access(exiftool_path, os.X_OK):
|
||||
return exiftool_path
|
||||
|
||||
raise RuntimeError("Unable to find exiftool")
|
@ -1,11 +1,6 @@
|
||||
import subprocess
|
||||
import imghdr
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import re
|
||||
from typing import Set, Dict, Union
|
||||
from typing import Set
|
||||
|
||||
import cairo
|
||||
|
||||
@ -13,44 +8,12 @@ import gi
|
||||
gi.require_version('GdkPixbuf', '2.0')
|
||||
from gi.repository import GdkPixbuf
|
||||
|
||||
from . import abstract, _get_exiftool_path
|
||||
from . import exiftool
|
||||
|
||||
# Make pyflakes happy
|
||||
assert Set
|
||||
|
||||
class _ImageParser(abstract.AbstractParser):
|
||||
""" Since we use `exiftool` to get metadata from
|
||||
all images fileformat, `get_meta` is implemented in this class,
|
||||
and all the image-handling ones are inheriting from it."""
|
||||
meta_whitelist = set() # type: Set[str]
|
||||
|
||||
@staticmethod
|
||||
def __handle_problematic_filename(filename: str, callback) -> bytes:
|
||||
""" This method takes a filename with a problematic name,
|
||||
and safely applies it a `callback`."""
|
||||
tmpdirname = tempfile.mkdtemp()
|
||||
fname = os.path.join(tmpdirname, "temp_file")
|
||||
shutil.copy(filename, fname)
|
||||
out = callback(fname)
|
||||
shutil.rmtree(tmpdirname)
|
||||
return out
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||
""" There is no way to escape the leading(s) dash(es) of the current
|
||||
self.filename to prevent parameter injections, so we need to take care
|
||||
of this.
|
||||
"""
|
||||
fun = lambda f: subprocess.check_output([_get_exiftool_path(), '-json', f])
|
||||
if re.search('^[a-z0-9/]', self.filename) is None:
|
||||
out = self.__handle_problematic_filename(self.filename, fun)
|
||||
else:
|
||||
out = fun(self.filename)
|
||||
meta = json.loads(out.decode('utf-8'))[0]
|
||||
for key in self.meta_whitelist:
|
||||
meta.pop(key, None)
|
||||
return meta
|
||||
|
||||
class PNGParser(_ImageParser):
|
||||
class PNGParser(exiftool.ExiftoolParser):
|
||||
mimetypes = {'image/png', }
|
||||
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
|
||||
'Directory', 'FileSize', 'FileModifyDate',
|
||||
@ -77,7 +40,7 @@ class PNGParser(_ImageParser):
|
||||
return True
|
||||
|
||||
|
||||
class GdkPixbufAbstractParser(_ImageParser):
|
||||
class GdkPixbufAbstractParser(exiftool.ExiftoolParser):
|
||||
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
|
||||
this has the side-effect of completely removing metadata.
|
||||
"""
|
||||
|
@ -18,6 +18,8 @@ def __load_all_parsers():
|
||||
continue
|
||||
elif fname.endswith('__init__.py'):
|
||||
continue
|
||||
elif fname.endswith('exiftool.py'):
|
||||
continue
|
||||
basename = os.path.basename(fname)
|
||||
name, _ = os.path.splitext(basename)
|
||||
importlib.import_module('.' + name, package='libmat2')
|
||||
|
58
libmat2/video.py
Normal file
58
libmat2/video.py
Normal file
@ -0,0 +1,58 @@
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from . import exiftool
|
||||
|
||||
|
||||
class AVIParser(exiftool.ExiftoolParser):
|
||||
mimetypes = {'video/x-msvideo', }
|
||||
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
|
||||
'FileSize', 'FileModifyDate', 'FileAccessDate',
|
||||
'FileInodeChangeDate', 'FilePermissions', 'FileType',
|
||||
'FileTypeExtension', 'MIMEType', 'FrameRate', 'MaxDataRate',
|
||||
'FrameCount', 'StreamCount', 'StreamType', 'VideoCodec',
|
||||
'VideoFrameRate', 'VideoFrameCount', 'Quality',
|
||||
'SampleSize', 'BMPVersion', 'ImageWidth', 'ImageHeight',
|
||||
'Planes', 'BitDepth', 'Compression', 'ImageLength',
|
||||
'PixelsPerMeterX', 'PixelsPerMeterY', 'NumColors',
|
||||
'NumImportantColors', 'NumColors', 'NumImportantColors',
|
||||
'RedMask', 'GreenMask', 'BlueMask', 'AlphaMask',
|
||||
'ColorSpace', 'AudioCodec', 'AudioCodecRate',
|
||||
'AudioSampleCount', 'AudioSampleCount',
|
||||
'AudioSampleRate', 'Encoding', 'NumChannels',
|
||||
'SampleRate', 'AvgBytesPerSec', 'BitsPerSample',
|
||||
'Duration', 'ImageSize', 'Megapixels'}
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
"""
|
||||
TODO: handle problematic filenames starting with `-` and `--`,
|
||||
check exiftool.py
|
||||
"""
|
||||
cmd = [_get_ffmpeg_path(),
|
||||
'-i', self.filename, # input file
|
||||
'-y', # overwrite existing output file
|
||||
'-loglevel', 'panic', # Don't show log
|
||||
'-hide_banner', # hide the banner
|
||||
'-codec', 'copy', # don't decode anything, just copy (speed!)
|
||||
'-map_metadata', '-1', # remove supperficial metadata
|
||||
'-map_chapters', '-1', # remove chapters
|
||||
'-fflags', '+bitexact', # don't add any metadata
|
||||
'-flags:v', '+bitexact', # don't add any metadata
|
||||
'-flags:a', '+bitexact', # don't add any metadata
|
||||
self.output_filename]
|
||||
|
||||
try:
|
||||
subprocess.check_call(cmd)
|
||||
except subprocess.CalledProcessError: # pragma: no cover
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _get_ffmpeg_path() -> str: # pragma: no cover
|
||||
ffmpeg_path = '/usr/bin/ffmpeg'
|
||||
if os.path.isfile(ffmpeg_path):
|
||||
if os.access(ffmpeg_path, os.X_OK):
|
||||
return ffmpeg_path
|
||||
|
||||
raise RuntimeError("Unable to find ffmpeg")
|
8
mat2
8
mat2
@ -97,7 +97,13 @@ def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy)
|
||||
return False
|
||||
p.unknown_member_policy = policy
|
||||
p.lightweight_cleaning = is_lightweight
|
||||
return p.remove_all()
|
||||
|
||||
try:
|
||||
return p.remove_all()
|
||||
except RuntimeError as e:
|
||||
print("[-] %s can't be cleaned: %s" % (filename, e))
|
||||
return False
|
||||
|
||||
|
||||
|
||||
def show_parsers() -> bool:
|
||||
|
BIN
tests/data/dirty.avi
Normal file
BIN
tests/data/dirty.avi
Normal file
Binary file not shown.
@ -6,12 +6,16 @@ import os
|
||||
import zipfile
|
||||
|
||||
from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless
|
||||
from libmat2 import check_dependencies
|
||||
from libmat2 import check_dependencies, video
|
||||
|
||||
|
||||
class TestCheckDependencies(unittest.TestCase):
|
||||
def test_deps(self):
|
||||
ret = check_dependencies()
|
||||
try:
|
||||
ret = check_dependencies()
|
||||
except RuntimeError:
|
||||
return # this happens if not every dependency is installed
|
||||
|
||||
for value in ret.values():
|
||||
self.assertTrue(value)
|
||||
|
||||
@ -471,3 +475,24 @@ class TestCleaning(unittest.TestCase):
|
||||
os.remove('./tests/data/clean.txt')
|
||||
os.remove('./tests/data/clean.cleaned.txt')
|
||||
os.remove('./tests/data/clean.cleaned.cleaned.txt')
|
||||
|
||||
def test_avi(self):
|
||||
shutil.copy('./tests/data/dirty.avi', './tests/data/clean.avi')
|
||||
p = video.AVIParser('./tests/data/clean.avi')
|
||||
|
||||
meta = p.get_meta()
|
||||
self.assertEqual(meta['Software'], 'MEncoder SVN-r33148-4.0.1')
|
||||
|
||||
try:
|
||||
ret = p.remove_all()
|
||||
except RuntimeError:
|
||||
return # this happens if ffmepg is not installed
|
||||
self.assertTrue(ret)
|
||||
|
||||
p = video.AVIParser('./tests/data/clean.cleaned.avi')
|
||||
self.assertEqual(p.get_meta(), {})
|
||||
self.assertTrue(p.remove_all())
|
||||
|
||||
os.remove('./tests/data/clean.avi')
|
||||
os.remove('./tests/data/clean.cleaned.avi')
|
||||
os.remove('./tests/data/clean.cleaned.cleaned.avi')
|
||||
|
Loading…
Reference in New Issue
Block a user