1
0
mirror of synced 2024-06-15 10:39:52 +02:00

Improve reliability against corrupted image files

This commit is contained in:
jvoisin 2018-06-22 20:38:29 +02:00
parent dfccf79f22
commit b4ef0c9622
2 changed files with 25 additions and 2 deletions

View File

@ -1,4 +1,5 @@
import subprocess import subprocess
import imghdr
import json import json
import os import os
import shutil import shutil
@ -68,6 +69,8 @@ class GdkPixbufAbstractParser(__ImageParser):
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it, """ GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
this has the side-effect of removing metadata completely. this has the side-effect of removing metadata completely.
""" """
_type = ''
def remove_all(self): def remove_all(self):
_, extension = os.path.splitext(self.filename) _, extension = os.path.splitext(self.filename)
pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename) pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename)
@ -76,8 +79,14 @@ class GdkPixbufAbstractParser(__ImageParser):
pixbuf.savev(self.output_filename, extension[1:], [], []) pixbuf.savev(self.output_filename, extension[1:], [], [])
return True return True
def __init__(self, filename):
super().__init__(filename)
if imghdr.what(filename) != self._type: # better safe than sorry
raise ValueError
class JPGParser(GdkPixbufAbstractParser): class JPGParser(GdkPixbufAbstractParser):
_type = 'jpeg'
mimetypes = {'image/jpeg'} mimetypes = {'image/jpeg'}
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
'Directory', 'FileSize', 'FileModifyDate', 'Directory', 'FileSize', 'FileModifyDate',
@ -90,6 +99,7 @@ class JPGParser(GdkPixbufAbstractParser):
class TiffParser(GdkPixbufAbstractParser): class TiffParser(GdkPixbufAbstractParser):
_type = 'tiff'
mimetypes = {'image/tiff'} mimetypes = {'image/tiff'}
meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples', meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
'FillOrder', 'PhotometricInterpretation', 'FillOrder', 'PhotometricInterpretation',
@ -103,6 +113,7 @@ class TiffParser(GdkPixbufAbstractParser):
class BMPParser(GdkPixbufAbstractParser): class BMPParser(GdkPixbufAbstractParser):
_type = 'bmp'
mimetypes = {'image/x-ms-bmp'} mimetypes = {'image/x-ms-bmp'}
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory', meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
'FileSize', 'FileModifyDate', 'FileAccessDate', 'FileSize', 'FileModifyDate', 'FileAccessDate',

View File

@ -87,13 +87,25 @@ class TestCorruptedFiles(unittest.TestCase):
f.write("trailing garbage") f.write("trailing garbage")
p = torrent.TorrentParser('./tests/data/clean.torrent') p = torrent.TorrentParser('./tests/data/clean.torrent')
self.assertEqual(p.get_meta(), expected) self.assertEqual(p.get_meta(), expected)
os.remove('./tests/data/clean.torrent') os.remove('./tests/data/clean.torrent')
def test_odg(self): def test_odg(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg') shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg')
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
office.LibreOfficeParser('./tests/data/clean.odg') office.LibreOfficeParser('./tests/data/clean.odg')
os.remove('./tests/data/clean.odg')
def test_bmp(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.bmp')
with self.assertRaises(ValueError):
p = images.BMPParser('./tests/data/clean.bmp')
os.remove('./tests/data/clean.bmp')
def test_docx(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.docx')
with self.assertRaises(ValueError):
p = office.MSOfficeParser('./tests/data/clean.docx')
os.remove('./tests/data/clean.docx')
class TestGetMeta(unittest.TestCase): class TestGetMeta(unittest.TestCase):
def test_pdf(self): def test_pdf(self):
@ -123,7 +135,7 @@ class TestGetMeta(unittest.TestCase):
self.assertEqual(meta['Comment'], 'Created with GIMP') self.assertEqual(meta['Comment'], 'Created with GIMP')
def test_tiff(self): def test_tiff(self):
p = images.JPGParser('./tests/data/dirty.tiff') p = images.TiffParser('./tests/data/dirty.tiff')
meta = p.get_meta() meta = p.get_meta()
self.assertEqual(meta['Make'], 'OLYMPUS IMAGING CORP.') self.assertEqual(meta['Make'], 'OLYMPUS IMAGING CORP.')
self.assertEqual(meta['Model'], 'C7070WZ') self.assertEqual(meta['Model'], 'C7070WZ')