From b4ef0c9622a0741bcfa0da1f65d9082251fb4107 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 22 Jun 2018 20:38:29 +0200 Subject: [PATCH] Improve reliability against corrupted image files --- libmat2/images.py | 11 +++++++++++ tests/test_libmat2.py | 16 ++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/libmat2/images.py b/libmat2/images.py index 03718e6..a7a9cad 100644 --- a/libmat2/images.py +++ b/libmat2/images.py @@ -1,4 +1,5 @@ import subprocess +import imghdr import json import os import shutil @@ -68,6 +69,8 @@ class GdkPixbufAbstractParser(__ImageParser): """ GdkPixbuf can handle a lot of surfaces, so we're rending images on it, this has the side-effect of removing metadata completely. """ + _type = '' + def remove_all(self): _, extension = os.path.splitext(self.filename) pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename) @@ -76,8 +79,14 @@ class GdkPixbufAbstractParser(__ImageParser): pixbuf.savev(self.output_filename, extension[1:], [], []) return True + def __init__(self, filename): + super().__init__(filename) + if imghdr.what(filename) != self._type: # better safe than sorry + raise ValueError + class JPGParser(GdkPixbufAbstractParser): + _type = 'jpeg' mimetypes = {'image/jpeg'} meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', 'FileSize', 'FileModifyDate', @@ -90,6 +99,7 @@ class JPGParser(GdkPixbufAbstractParser): class TiffParser(GdkPixbufAbstractParser): + _type = 'tiff' mimetypes = {'image/tiff'} meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples', 'FillOrder', 'PhotometricInterpretation', @@ -103,6 +113,7 @@ class TiffParser(GdkPixbufAbstractParser): class BMPParser(GdkPixbufAbstractParser): + _type = 'bmp' mimetypes = {'image/x-ms-bmp'} meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate', diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index e1d949d..0df333d 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -87,13 +87,25 @@ class TestCorruptedFiles(unittest.TestCase): f.write("trailing garbage") p = torrent.TorrentParser('./tests/data/clean.torrent') self.assertEqual(p.get_meta(), expected) - os.remove('./tests/data/clean.torrent') def test_odg(self): shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg') with self.assertRaises(ValueError): office.LibreOfficeParser('./tests/data/clean.odg') + os.remove('./tests/data/clean.odg') + + def test_bmp(self): + shutil.copy('./tests/data/dirty.png', './tests/data/clean.bmp') + with self.assertRaises(ValueError): + p = images.BMPParser('./tests/data/clean.bmp') + os.remove('./tests/data/clean.bmp') + + def test_docx(self): + shutil.copy('./tests/data/dirty.png', './tests/data/clean.docx') + with self.assertRaises(ValueError): + p = office.MSOfficeParser('./tests/data/clean.docx') + os.remove('./tests/data/clean.docx') class TestGetMeta(unittest.TestCase): def test_pdf(self): @@ -123,7 +135,7 @@ class TestGetMeta(unittest.TestCase): self.assertEqual(meta['Comment'], 'Created with GIMP') def test_tiff(self): - p = images.JPGParser('./tests/data/dirty.tiff') + p = images.TiffParser('./tests/data/dirty.tiff') meta = p.get_meta() self.assertEqual(meta['Make'], 'OLYMPUS IMAGING CORP.') self.assertEqual(meta['Model'], 'C7070WZ')