diff --git a/libmat2/office.py b/libmat2/office.py index 90f7c7a..914fd39 100644 --- a/libmat2/office.py +++ b/libmat2/office.py @@ -40,7 +40,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): def _clean_internal_file(self, item: zipfile.ZipInfo, temp_folder: str, - zin: zipfile.ZipFile, zout: zipfile.ZipFile): + zin: zipfile.ZipFile, zout: zipfile.ZipFile) -> bool: output = '' zin.extract(member=item, path=temp_folder) if item.filename not in self.whitelist: @@ -48,7 +48,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore if not tmp_parser: print("%s's format (%s) isn't supported" % (item.filename, mtype)) - return + return False tmp_parser.remove_all() output = tmp_parser.output_filename else: @@ -57,6 +57,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): clean_zinfo = self._clean_zipinfo(zinfo) with open(output, 'rb') as f: zout.writestr(clean_zinfo, f.read()) + return True class MSOfficeParser(ArchiveBasedAbstractParser): @@ -104,7 +105,10 @@ class MSOfficeParser(ArchiveBasedAbstractParser): zout.writestr(item, zin.read(item)) continue - self._clean_internal_file(item, temp_folder, zin, zout) + if self._clean_internal_file(item, temp_folder, zin, zout) is False: + zout.close() + os.remove(self.output_filename) + return False shutil.rmtree(temp_folder) zout.close() @@ -156,7 +160,9 @@ class LibreOfficeParser(ArchiveBasedAbstractParser): elif item.filename == 'meta.xml': continue # don't keep metadata files - self._clean_internal_file(item, temp_folder, zin, zout) + if self._clean_internal_file(item, temp_folder, zin, zout) is False: + os.remove(self.output_filename) + return False shutil.rmtree(temp_folder) zout.close() diff --git a/tests/data/embedded.docx b/tests/data/embedded.docx new file mode 100644 index 0000000..b134724 Binary files /dev/null and b/tests/data/embedded.docx differ diff --git a/tests/data/embedded.odt b/tests/data/embedded.odt new file mode 100644 index 0000000..62bf8cc Binary files /dev/null and b/tests/data/embedded.odt differ diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 7deeadc..c85f425 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -27,6 +27,19 @@ class TestParameterInjection(unittest.TestCase): os.remove('-ver') +class TestUnsupportedEmbeddedFiles(unittest.TestCase): + def test_odt_with_svg(self): + shutil.copy('./tests/data/embedded.odt', './tests/data/clean.odt') + p = office.LibreOfficeParser('./tests/data/clean.odt') + self.assertFalse(p.remove_all()) + os.remove('./tests/data/clean.odt') + + def test_docx_with_svg(self): + shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx') + p = office.MSOfficeParser('./tests/data/clean.docx') + self.assertFalse(p.remove_all()) + os.remove('./tests/data/clean.docx') + class TestUnsupportedFiles(unittest.TestCase): def test_pdf(self): shutil.copy('./tests/test_libmat2.py', './tests/clean.py')