Test for faulty files, and document how MAT2 is behaving wrt. them
This commit is contained in:
parent
459e9b82f7
commit
b02d72887a
@ -9,6 +9,14 @@ that only cleans the superficial metadata of your file, but not
|
|||||||
the ones that might be in **embeded** resources. Like for example,
|
the ones that might be in **embeded** resources. Like for example,
|
||||||
images in a PDF or an office document.
|
images in a PDF or an office document.
|
||||||
|
|
||||||
|
Race conditions
|
||||||
|
---------------
|
||||||
|
|
||||||
|
MAT2 does its very best to avoid crashing at runtime. This is why it's checking
|
||||||
|
if the file is valid __at parser creation__. MAT2 doesn't take any measure to
|
||||||
|
ensure that the file is not changed between the time the parser is
|
||||||
|
instantiated, and the call to clean or show the metadata.
|
||||||
|
|
||||||
Symlink attacks
|
Symlink attacks
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
@ -20,6 +20,13 @@ class PNGParser(abstract.AbstractParser):
|
|||||||
'Compression', 'Filter', 'Interlace', 'BackgroundColor', 'ImageSize',
|
'Compression', 'Filter', 'Interlace', 'BackgroundColor', 'ImageSize',
|
||||||
'Megapixels', 'ImageHeight'}
|
'Megapixels', 'ImageHeight'}
|
||||||
|
|
||||||
|
def __init__(self, filename):
|
||||||
|
super().__init__(filename)
|
||||||
|
try: # better fail here than later
|
||||||
|
cairo.ImageSurface.create_from_png(self.filename)
|
||||||
|
except MemoryError:
|
||||||
|
raise ValueError
|
||||||
|
|
||||||
def get_meta(self):
|
def get_meta(self):
|
||||||
out = subprocess.check_output(['/usr/bin/exiftool', '-json', self.filename])
|
out = subprocess.check_output(['/usr/bin/exiftool', '-json', self.filename])
|
||||||
meta = json.loads(out.decode('utf-8'))[0]
|
meta = json.loads(out.decode('utf-8'))[0]
|
||||||
|
@ -30,5 +30,8 @@ def get_parser(filename: str) -> (T, str):
|
|||||||
|
|
||||||
for c in _get_parsers():
|
for c in _get_parsers():
|
||||||
if mtype in c.mimetypes:
|
if mtype in c.mimetypes:
|
||||||
return c(filename), mtype
|
try:
|
||||||
|
return c(filename), mtype
|
||||||
|
except ValueError:
|
||||||
|
return None, mtype
|
||||||
return None, mtype
|
return None, mtype
|
||||||
|
@ -11,7 +11,7 @@ import io
|
|||||||
import cairo
|
import cairo
|
||||||
import gi
|
import gi
|
||||||
gi.require_version('Poppler', '0.18')
|
gi.require_version('Poppler', '0.18')
|
||||||
from gi.repository import Poppler
|
from gi.repository import Poppler, GLib
|
||||||
|
|
||||||
from . import abstract
|
from . import abstract
|
||||||
|
|
||||||
@ -28,6 +28,10 @@ class PDFParser(abstract.AbstractParser):
|
|||||||
super().__init__(filename)
|
super().__init__(filename)
|
||||||
self.uri = 'file://' + os.path.abspath(self.filename)
|
self.uri = 'file://' + os.path.abspath(self.filename)
|
||||||
self.__scale = 2 # how much precision do we want for the render
|
self.__scale = 2 # how much precision do we want for the render
|
||||||
|
try: # Check now that the file is valid, to avoid surprises later
|
||||||
|
Poppler.Document.new_from_file(self.uri, None)
|
||||||
|
except GLib.GError: # Invalid PDF
|
||||||
|
raise ValueError
|
||||||
|
|
||||||
def remove_all_lightweight(self):
|
def remove_all_lightweight(self):
|
||||||
"""
|
"""
|
||||||
@ -116,8 +120,9 @@ class PDFParser(abstract.AbstractParser):
|
|||||||
def get_meta(self):
|
def get_meta(self):
|
||||||
""" Return a dict with all the meta of the file
|
""" Return a dict with all the meta of the file
|
||||||
"""
|
"""
|
||||||
document = Poppler.Document.new_from_file(self.uri, None)
|
|
||||||
metadata = {}
|
metadata = {}
|
||||||
|
document = Poppler.Document.new_from_file(self.uri, None)
|
||||||
|
|
||||||
for key in self.meta_list:
|
for key in self.meta_list:
|
||||||
if document.get_property(key):
|
if document.get_property(key):
|
||||||
metadata[key] = document.get_property(key)
|
metadata[key] = document.get_property(key)
|
||||||
|
@ -16,6 +16,18 @@ class TestParserFactory(unittest.TestCase):
|
|||||||
self.assertEqual(mimetype, 'audio/mpeg')
|
self.assertEqual(mimetype, 'audio/mpeg')
|
||||||
self.assertEqual(parser.__class__, audio.MP3Parser)
|
self.assertEqual(parser.__class__, audio.MP3Parser)
|
||||||
|
|
||||||
|
class TestCorruptedFiles(unittest.TestCase):
|
||||||
|
def test_pdf(self):
|
||||||
|
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
pdf.PDFParser('./tests/data/clean.png')
|
||||||
|
os.remove('./tests/data/clean.png')
|
||||||
|
|
||||||
|
def test_png(self):
|
||||||
|
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
images.PNGParser('./tests/data/clean.pdf')
|
||||||
|
os.remove('./tests/data/clean.pdf')
|
||||||
|
|
||||||
class TestGetMeta(unittest.TestCase):
|
class TestGetMeta(unittest.TestCase):
|
||||||
def test_pdf(self):
|
def test_pdf(self):
|
||||||
|
Loading…
Reference in New Issue
Block a user