1
0
Fork 0

Improve reliability against corrupted image files

This commit is contained in:
jvoisin 2018-06-22 20:38:29 +02:00
parent dfccf79f22
commit b4ef0c9622
2 changed files with 25 additions and 2 deletions

View File

@ -1,4 +1,5 @@
import subprocess
import imghdr
import json
import os
import shutil
@ -68,6 +69,8 @@ class GdkPixbufAbstractParser(__ImageParser):
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
this has the side-effect of removing metadata completely.
"""
_type = ''
def remove_all(self):
_, extension = os.path.splitext(self.filename)
pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename)
@ -76,8 +79,14 @@ class GdkPixbufAbstractParser(__ImageParser):
pixbuf.savev(self.output_filename, extension[1:], [], [])
return True
def __init__(self, filename):
super().__init__(filename)
if imghdr.what(filename) != self._type: # better safe than sorry
raise ValueError
class JPGParser(GdkPixbufAbstractParser):
_type = 'jpeg'
mimetypes = {'image/jpeg'}
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
'Directory', 'FileSize', 'FileModifyDate',
@ -90,6 +99,7 @@ class JPGParser(GdkPixbufAbstractParser):
class TiffParser(GdkPixbufAbstractParser):
_type = 'tiff'
mimetypes = {'image/tiff'}
meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
'FillOrder', 'PhotometricInterpretation',
@ -103,6 +113,7 @@ class TiffParser(GdkPixbufAbstractParser):
class BMPParser(GdkPixbufAbstractParser):
_type = 'bmp'
mimetypes = {'image/x-ms-bmp'}
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
'FileSize', 'FileModifyDate', 'FileAccessDate',

View File

@ -87,13 +87,25 @@ class TestCorruptedFiles(unittest.TestCase):
f.write("trailing garbage")
p = torrent.TorrentParser('./tests/data/clean.torrent')
self.assertEqual(p.get_meta(), expected)
os.remove('./tests/data/clean.torrent')
def test_odg(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg')
with self.assertRaises(ValueError):
office.LibreOfficeParser('./tests/data/clean.odg')
os.remove('./tests/data/clean.odg')
def test_bmp(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.bmp')
with self.assertRaises(ValueError):
p = images.BMPParser('./tests/data/clean.bmp')
os.remove('./tests/data/clean.bmp')
def test_docx(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.docx')
with self.assertRaises(ValueError):
p = office.MSOfficeParser('./tests/data/clean.docx')
os.remove('./tests/data/clean.docx')
class TestGetMeta(unittest.TestCase):
def test_pdf(self):
@ -123,7 +135,7 @@ class TestGetMeta(unittest.TestCase):
self.assertEqual(meta['Comment'], 'Created with GIMP')
def test_tiff(self):
p = images.JPGParser('./tests/data/dirty.tiff')
p = images.TiffParser('./tests/data/dirty.tiff')
meta = p.get_meta()
self.assertEqual(meta['Make'], 'OLYMPUS IMAGING CORP.')
self.assertEqual(meta['Model'], 'C7070WZ')