diff --git a/libmat2/video.py b/libmat2/video.py index a5029c0..825df92 100644 --- a/libmat2/video.py +++ b/libmat2/video.py @@ -9,7 +9,14 @@ from . import exiftool class AbstractFFmpegParser(exiftool.ExiftoolParser): """ Abstract parser for all FFmpeg-based ones, mainly for video. """ + # Some fileformats have mandatory metadata fields + meta_key_value_whitelist = {} # type: Dict[str, Union[str, int]] + def remove_all(self) -> bool: + if self.meta_key_value_whitelist: + logging.warning('The format of "%s" (video/mp4) has some mandatory ' + 'metadata fields; mat2 filled them with standard ' + 'data.', self.filename) cmd = [_get_ffmpeg_path(), '-i', self.filename, # input file '-y', # overwrite existing output file @@ -31,6 +38,41 @@ class AbstractFFmpegParser(exiftool.ExiftoolParser): return False return True + def get_meta(self) -> Dict[str, Union[str, dict]]: + meta = super().get_meta() + + ret = dict() # type: Dict[str, Union[str, dict]] + for key, value in meta.items(): + if key in self.meta_key_value_whitelist.keys(): + if value == self.meta_key_value_whitelist[key]: + continue + ret[key] = value + return ret + + +class WMVParser(AbstractFFmpegParser): + mimetypes = {'video/x-ms-wmv', } + meta_whitelist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName', + 'ErrorCorrectionType', 'AudioSampleRate', 'DataPackets', + 'Directory', 'Duration', 'ExifToolVersion', + 'FileAccessDate', 'FileInodeChangeDate', 'FileLength', + 'FileModifyDate', 'FileName', 'FilePermissions', + 'FileSize', 'FileType', 'FileTypeExtension', + 'FrameCount', 'FrameRate', 'ImageHeight', 'ImageSize', + 'ImageWidth', 'MIMEType', 'MaxBitrate', 'MaxPacketSize', + 'Megapixels', 'MinPacketSize', 'Preroll', 'SendDuration', + 'SourceFile', 'StreamNumber', 'VideoCodecName', } + meta_key_value_whitelist = { # some metadata are mandatory :/ + 'AudioCodecDescription': '', + 'CreationDate': '0000:00:00 00:00:00Z', + 'FileID': '00000000-0000-0000-0000-000000000000', + 'Flags': 2, # FIXME: What is this? Why 2? + 'ModifyDate': '0000:00:00 00:00:00', + 'TimeOffset': '0 s', + 'VideoCodecDescription': '', + 'StreamType': 'Audio', + } + class AVIParser(AbstractFFmpegParser): mimetypes = {'video/x-msvideo', } @@ -51,6 +93,7 @@ class AVIParser(AbstractFFmpegParser): 'SampleRate', 'AvgBytesPerSec', 'BitsPerSample', 'Duration', 'ImageSize', 'Megapixels'} + class MP4Parser(AbstractFFmpegParser): mimetypes = {'video/mp4', } meta_whitelist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration', @@ -84,23 +127,6 @@ class MP4Parser(AbstractFFmpegParser): 'TrackVolume': '0.00%', } - def remove_all(self) -> bool: - logging.warning('The format of "%s" (video/mp4) has some mandatory ' - 'metadata fields; mat2 filled them with standard data.', - self.filename) - return super().remove_all() - - def get_meta(self) -> Dict[str, Union[str, dict]]: - meta = super().get_meta() - - ret = dict() # type: Dict[str, Union[str, dict]] - for key, value in meta.items(): - if key in self.meta_key_value_whitelist.keys(): - if value == self.meta_key_value_whitelist[key]: - continue - ret[key] = value - return ret - def _get_ffmpeg_path() -> str: # pragma: no cover ffmpeg_path = '/usr/bin/ffmpeg' diff --git a/tests/data/dirty.wmv b/tests/data/dirty.wmv new file mode 100644 index 0000000..b007f4c Binary files /dev/null and b/tests/data/dirty.wmv differ diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index e3072a8..9152b2f 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -165,6 +165,11 @@ class TestGetMeta(unittest.TestCase): self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') os.remove('./tests/data/dirty.zip') + def test_wmv(self): + p, mimetype = parser_factory.get_parser('./tests/data/dirty.wmv') + self.assertEqual(mimetype, 'video/x-ms-wmv') + meta = p.get_meta() + self.assertEqual(meta['EncodingSettings'], 'Lavf52.103.0') class TestRemovingThumbnails(unittest.TestCase): def test_odt(self): @@ -544,3 +549,26 @@ class TestCleaning(unittest.TestCase): os.remove('./tests/data/clean.mp4') os.remove('./tests/data/clean.cleaned.mp4') os.remove('./tests/data/clean.cleaned.cleaned.mp4') + + def test_wmv(self): + try: + video._get_ffmpeg_path() + except RuntimeError: + raise unittest.SkipTest + + shutil.copy('./tests/data/dirty.wmv', './tests/data/clean.wmv') + p = video.WMVParser('./tests/data/clean.wmv') + + meta = p.get_meta() + self.assertEqual(meta['EncodingSettings'], 'Lavf52.103.0') + + ret = p.remove_all() + self.assertTrue(ret) + + p = video.WMVParser('./tests/data/clean.cleaned.wmv') + self.assertNotIn('EncodingSettings', p.get_meta()) + self.assertTrue(p.remove_all()) + + os.remove('./tests/data/clean.wmv') + os.remove('./tests/data/clean.cleaned.wmv') + os.remove('./tests/data/clean.cleaned.cleaned.wmv')