1
0
mirror of synced 2024-11-25 18:54:22 +01:00

Add some tests for non-supported embedded fileformats

This commit is contained in:
jvoisin 2018-06-10 20:19:35 +02:00
parent b310a18e69
commit 8c7979aae3
4 changed files with 23 additions and 4 deletions

View File

@ -40,7 +40,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
def _clean_internal_file(self, item: zipfile.ZipInfo, temp_folder: str, def _clean_internal_file(self, item: zipfile.ZipInfo, temp_folder: str,
zin: zipfile.ZipFile, zout: zipfile.ZipFile): zin: zipfile.ZipFile, zout: zipfile.ZipFile) -> bool:
output = '' output = ''
zin.extract(member=item, path=temp_folder) zin.extract(member=item, path=temp_folder)
if item.filename not in self.whitelist: if item.filename not in self.whitelist:
@ -48,7 +48,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore
if not tmp_parser: if not tmp_parser:
print("%s's format (%s) isn't supported" % (item.filename, mtype)) print("%s's format (%s) isn't supported" % (item.filename, mtype))
return return False
tmp_parser.remove_all() tmp_parser.remove_all()
output = tmp_parser.output_filename output = tmp_parser.output_filename
else: else:
@ -57,6 +57,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
clean_zinfo = self._clean_zipinfo(zinfo) clean_zinfo = self._clean_zipinfo(zinfo)
with open(output, 'rb') as f: with open(output, 'rb') as f:
zout.writestr(clean_zinfo, f.read()) zout.writestr(clean_zinfo, f.read())
return True
class MSOfficeParser(ArchiveBasedAbstractParser): class MSOfficeParser(ArchiveBasedAbstractParser):
@ -104,7 +105,10 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
zout.writestr(item, zin.read(item)) zout.writestr(item, zin.read(item))
continue continue
self._clean_internal_file(item, temp_folder, zin, zout) if self._clean_internal_file(item, temp_folder, zin, zout) is False:
zout.close()
os.remove(self.output_filename)
return False
shutil.rmtree(temp_folder) shutil.rmtree(temp_folder)
zout.close() zout.close()
@ -156,7 +160,9 @@ class LibreOfficeParser(ArchiveBasedAbstractParser):
elif item.filename == 'meta.xml': elif item.filename == 'meta.xml':
continue # don't keep metadata files continue # don't keep metadata files
self._clean_internal_file(item, temp_folder, zin, zout) if self._clean_internal_file(item, temp_folder, zin, zout) is False:
os.remove(self.output_filename)
return False
shutil.rmtree(temp_folder) shutil.rmtree(temp_folder)
zout.close() zout.close()

BIN
tests/data/embedded.docx Normal file

Binary file not shown.

BIN
tests/data/embedded.odt Normal file

Binary file not shown.

View File

@ -27,6 +27,19 @@ class TestParameterInjection(unittest.TestCase):
os.remove('-ver') os.remove('-ver')
class TestUnsupportedEmbeddedFiles(unittest.TestCase):
def test_odt_with_svg(self):
shutil.copy('./tests/data/embedded.odt', './tests/data/clean.odt')
p = office.LibreOfficeParser('./tests/data/clean.odt')
self.assertFalse(p.remove_all())
os.remove('./tests/data/clean.odt')
def test_docx_with_svg(self):
shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
self.assertFalse(p.remove_all())
os.remove('./tests/data/clean.docx')
class TestUnsupportedFiles(unittest.TestCase): class TestUnsupportedFiles(unittest.TestCase):
def test_pdf(self): def test_pdf(self):
shutil.copy('./tests/test_libmat2.py', './tests/clean.py') shutil.copy('./tests/test_libmat2.py', './tests/clean.py')