diff --git a/src/parsers/jpg.py b/src/parsers/jpg.py new file mode 100644 index 0000000..d1a4439 --- /dev/null +++ b/src/parsers/jpg.py @@ -0,0 +1,30 @@ +import subprocess +import json + +import gi +gi.require_version('GdkPixbuf', '2.0') +from gi.repository import GdkPixbuf + +from . import abstract + +class JPGParser(abstract.AbstractParser): + mimetypes = {'image/jpg', } + meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', + 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate', + "FileInodeChangeDate", 'FilePermissions', 'FileType', + 'FileTypeExtension', 'MIMEType', 'ImageWidth', + 'ImageSize', 'BitsPerSample', 'ColorComponents', 'EncodingProcess', + 'JFIFVersion', 'ResolutionUnit', 'XResolution', 'YCbCrSubSampling', + 'YResolution', 'Megapixels', 'ImageHeight'} + + def get_meta(self): + out = subprocess.check_output(['exiftool', '-json', self.filename]) + meta = json.loads(out)[0] + for key in self.meta_whitelist: + meta.pop(key, None) + return meta + + def remove_all(self): + pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename) + pixbuf.savev(self.output_filename, "jpeg", ["quality"], ["100"]) + return True diff --git a/tests/data/dirty.jpg b/tests/data/dirty.jpg new file mode 100644 index 0000000..15ca271 Binary files /dev/null and b/tests/data/dirty.jpg differ diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 9305080..1d31695 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -5,7 +5,7 @@ import shutil import os from src import parsers -from src.parsers import pdf, png +from src.parsers import pdf, png, jpg class TestGetMeta(unittest.TestCase): def test_pdf(self): @@ -20,6 +20,11 @@ class TestGetMeta(unittest.TestCase): self.assertEqual(meta['Comment'], 'This is a comment, be careful!') self.assertEqual(meta['ModifyDate'], "2018:03:20 21:59:25") + def test_jpg(self): + p = jpg.JPGParser('./tests/data/dirty.jpg') + meta = p.get_meta() + self.assertEqual(meta['Comment'], 'Created with GIMP') + class TestCleaning(unittest.TestCase): def test_pdf(self): shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf') @@ -51,3 +56,19 @@ class TestCleaning(unittest.TestCase): self.assertEqual(p.get_meta(), {}) os.remove('./tests/data/clean.png') + + + def test_jpg(self): + shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg') + p = jpg.JPGParser('./tests/data/clean.jpg') + + meta = p.get_meta() + self.assertEqual(meta['Comment'], 'Created with GIMP') + + ret = p.remove_all() + self.assertTrue(ret) + + p = jpg.JPGParser('./tests/data/clean.jpg.cleaned') + self.assertEqual(p.get_meta(), {}) + + os.remove('./tests/data/clean.jpg')