Add some tests for non-supported embedded fileformats
This commit is contained in:
parent
b310a18e69
commit
8c7979aae3
@ -40,7 +40,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
|
|
||||||
|
|
||||||
def _clean_internal_file(self, item: zipfile.ZipInfo, temp_folder: str,
|
def _clean_internal_file(self, item: zipfile.ZipInfo, temp_folder: str,
|
||||||
zin: zipfile.ZipFile, zout: zipfile.ZipFile):
|
zin: zipfile.ZipFile, zout: zipfile.ZipFile) -> bool:
|
||||||
output = ''
|
output = ''
|
||||||
zin.extract(member=item, path=temp_folder)
|
zin.extract(member=item, path=temp_folder)
|
||||||
if item.filename not in self.whitelist:
|
if item.filename not in self.whitelist:
|
||||||
@ -48,7 +48,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore
|
tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore
|
||||||
if not tmp_parser:
|
if not tmp_parser:
|
||||||
print("%s's format (%s) isn't supported" % (item.filename, mtype))
|
print("%s's format (%s) isn't supported" % (item.filename, mtype))
|
||||||
return
|
return False
|
||||||
tmp_parser.remove_all()
|
tmp_parser.remove_all()
|
||||||
output = tmp_parser.output_filename
|
output = tmp_parser.output_filename
|
||||||
else:
|
else:
|
||||||
@ -57,6 +57,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
clean_zinfo = self._clean_zipinfo(zinfo)
|
clean_zinfo = self._clean_zipinfo(zinfo)
|
||||||
with open(output, 'rb') as f:
|
with open(output, 'rb') as f:
|
||||||
zout.writestr(clean_zinfo, f.read())
|
zout.writestr(clean_zinfo, f.read())
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
class MSOfficeParser(ArchiveBasedAbstractParser):
|
class MSOfficeParser(ArchiveBasedAbstractParser):
|
||||||
@ -104,7 +105,10 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
|
|||||||
zout.writestr(item, zin.read(item))
|
zout.writestr(item, zin.read(item))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self._clean_internal_file(item, temp_folder, zin, zout)
|
if self._clean_internal_file(item, temp_folder, zin, zout) is False:
|
||||||
|
zout.close()
|
||||||
|
os.remove(self.output_filename)
|
||||||
|
return False
|
||||||
|
|
||||||
shutil.rmtree(temp_folder)
|
shutil.rmtree(temp_folder)
|
||||||
zout.close()
|
zout.close()
|
||||||
@ -156,7 +160,9 @@ class LibreOfficeParser(ArchiveBasedAbstractParser):
|
|||||||
elif item.filename == 'meta.xml':
|
elif item.filename == 'meta.xml':
|
||||||
continue # don't keep metadata files
|
continue # don't keep metadata files
|
||||||
|
|
||||||
self._clean_internal_file(item, temp_folder, zin, zout)
|
if self._clean_internal_file(item, temp_folder, zin, zout) is False:
|
||||||
|
os.remove(self.output_filename)
|
||||||
|
return False
|
||||||
|
|
||||||
shutil.rmtree(temp_folder)
|
shutil.rmtree(temp_folder)
|
||||||
zout.close()
|
zout.close()
|
||||||
|
BIN
tests/data/embedded.docx
Normal file
BIN
tests/data/embedded.docx
Normal file
Binary file not shown.
BIN
tests/data/embedded.odt
Normal file
BIN
tests/data/embedded.odt
Normal file
Binary file not shown.
@ -27,6 +27,19 @@ class TestParameterInjection(unittest.TestCase):
|
|||||||
os.remove('-ver')
|
os.remove('-ver')
|
||||||
|
|
||||||
|
|
||||||
|
class TestUnsupportedEmbeddedFiles(unittest.TestCase):
|
||||||
|
def test_odt_with_svg(self):
|
||||||
|
shutil.copy('./tests/data/embedded.odt', './tests/data/clean.odt')
|
||||||
|
p = office.LibreOfficeParser('./tests/data/clean.odt')
|
||||||
|
self.assertFalse(p.remove_all())
|
||||||
|
os.remove('./tests/data/clean.odt')
|
||||||
|
|
||||||
|
def test_docx_with_svg(self):
|
||||||
|
shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx')
|
||||||
|
p = office.MSOfficeParser('./tests/data/clean.docx')
|
||||||
|
self.assertFalse(p.remove_all())
|
||||||
|
os.remove('./tests/data/clean.docx')
|
||||||
|
|
||||||
class TestUnsupportedFiles(unittest.TestCase):
|
class TestUnsupportedFiles(unittest.TestCase):
|
||||||
def test_pdf(self):
|
def test_pdf(self):
|
||||||
shutil.copy('./tests/test_libmat2.py', './tests/clean.py')
|
shutil.copy('./tests/test_libmat2.py', './tests/clean.py')
|
||||||
|
Loading…
Reference in New Issue
Block a user