diff --git a/libmat2/video.py b/libmat2/video.py index 85b5b2e..a5029c0 100644 --- a/libmat2/video.py +++ b/libmat2/video.py @@ -2,10 +2,37 @@ import os import subprocess import logging +from typing import Dict, Union + from . import exiftool -class AVIParser(exiftool.ExiftoolParser): +class AbstractFFmpegParser(exiftool.ExiftoolParser): + """ Abstract parser for all FFmpeg-based ones, mainly for video. """ + def remove_all(self) -> bool: + cmd = [_get_ffmpeg_path(), + '-i', self.filename, # input file + '-y', # overwrite existing output file + '-map', '0', # copy everything all streams from input to output + '-codec', 'copy', # don't decode anything, just copy (speed!) + '-loglevel', 'panic', # Don't show log + '-hide_banner', # hide the banner + '-map_metadata', '-1', # remove supperficial metadata + '-map_chapters', '-1', # remove chapters + '-disposition', '0', # Remove dispositions (check ffmpeg's manpage) + '-fflags', '+bitexact', # don't add any metadata + '-flags:v', '+bitexact', # don't add any metadata + '-flags:a', '+bitexact', # don't add any metadata + self.output_filename] + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError as e: + logging.error("Something went wrong during the processing of %s: %s", self.filename, e) + return False + return True + + +class AVIParser(AbstractFFmpegParser): mimetypes = {'video/x-msvideo', } meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate', @@ -24,25 +51,55 @@ class AVIParser(exiftool.ExiftoolParser): 'SampleRate', 'AvgBytesPerSec', 'BitsPerSample', 'Duration', 'ImageSize', 'Megapixels'} +class MP4Parser(AbstractFFmpegParser): + mimetypes = {'video/mp4', } + meta_whitelist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration', + 'XResolution', 'YResolution', 'ExifToolVersion', + 'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate', + 'FileName', 'FilePermissions', 'MIMEType', 'FileType', + 'FileTypeExtension', 'Directory', 'ImageWidth', + 'ImageSize', 'ImageHeight', 'FileSize', 'SourceFile', + 'BitDepth', 'Duration', 'AudioChannels', + 'AudioBitsPerSample', 'AudioSampleRate', 'Megapixels', + 'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale', + 'SourceImageHeight', 'SourceImageWidth', + 'MatrixStructure', 'MediaDuration'} + meta_key_value_whitelist = { # some metadata are mandatory :/ + 'CreateDate': '0000:00:00 00:00:00', + 'CurrentTime': '0 s', + 'MediaCreateDate': '0000:00:00 00:00:00', + 'MediaLanguageCode': 'und', + 'MediaModifyDate': '0000:00:00 00:00:00', + 'ModifyDate': '0000:00:00 00:00:00', + 'OpColor': '0 0 0', + 'PosterTime': '0 s', + 'PreferredRate': '1', + 'PreferredVolume': '100.00%', + 'PreviewDuration': '0 s', + 'PreviewTime': '0 s', + 'SelectionDuration': '0 s', + 'SelectionTime': '0 s', + 'TrackCreateDate': '0000:00:00 00:00:00', + 'TrackModifyDate': '0000:00:00 00:00:00', + 'TrackVolume': '0.00%', + } + def remove_all(self) -> bool: - cmd = [_get_ffmpeg_path(), - '-i', self.filename, # input file - '-y', # overwrite existing output file - '-loglevel', 'panic', # Don't show log - '-hide_banner', # hide the banner - '-codec', 'copy', # don't decode anything, just copy (speed!) - '-map_metadata', '-1', # remove supperficial metadata - '-map_chapters', '-1', # remove chapters - '-fflags', '+bitexact', # don't add any metadata - '-flags:v', '+bitexact', # don't add any metadata - '-flags:a', '+bitexact', # don't add any metadata - self.output_filename] - try: - subprocess.check_call(cmd) - except subprocess.CalledProcessError as e: - logging.error("Something went wrong during the processing of %s: %s", self.filename, e) - return False - return True + logging.warning('The format of "%s" (video/mp4) has some mandatory ' + 'metadata fields; mat2 filled them with standard data.', + self.filename) + return super().remove_all() + + def get_meta(self) -> Dict[str, Union[str, dict]]: + meta = super().get_meta() + + ret = dict() # type: Dict[str, Union[str, dict]] + for key, value in meta.items(): + if key in self.meta_key_value_whitelist.keys(): + if value == self.meta_key_value_whitelist[key]: + continue + ret[key] = value + return ret def _get_ffmpeg_path() -> str: # pragma: no cover diff --git a/mat2 b/mat2 index be63829..a36f62d 100755 --- a/mat2 +++ b/mat2 @@ -20,6 +20,8 @@ __version__ = '0.5.0' assert Tuple assert Union +logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) + def __check_file(filename: str, mode: int=os.R_OK) -> bool: if not os.path.exists(filename): diff --git a/tests/data/dirty.mp4 b/tests/data/dirty.mp4 new file mode 100644 index 0000000..1fc4788 Binary files /dev/null and b/tests/data/dirty.mp4 differ diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 1602480..e3072a8 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -521,3 +521,26 @@ class TestCleaning(unittest.TestCase): os.remove('./tests/data/dirty.cleaned.zip') os.remove('./tests/data/dirty.cleaned.cleaned.zip') + + def test_mp4(self): + try: + video._get_ffmpeg_path() + except RuntimeError: + raise unittest.SkipTest + + shutil.copy('./tests/data/dirty.mp4', './tests/data/clean.mp4') + p = video.MP4Parser('./tests/data/clean.mp4') + + meta = p.get_meta() + self.assertEqual(meta['Encoder'], 'HandBrake 0.9.4 2009112300') + + ret = p.remove_all() + self.assertTrue(ret) + + p = video.MP4Parser('./tests/data/clean.cleaned.mp4') + self.assertNotIn('Encoder', p.get_meta()) + self.assertTrue(p.remove_all()) + + os.remove('./tests/data/clean.mp4') + os.remove('./tests/data/clean.cleaned.mp4') + os.remove('./tests/data/clean.cleaned.cleaned.mp4')