diff --git a/libmat2/office.py b/libmat2/office.py index e813fae..34ae7a2 100644 --- a/libmat2/office.py +++ b/libmat2/office.py @@ -146,6 +146,7 @@ class LibreOfficeParser(ArchiveBasedAbstractParser): files_to_omit = set(map(re.compile, { # type: ignore '^meta\.xml$', '^Configurations2/', + '^Thumbnails/', })) def get_meta(self) -> Dict[str, str]: diff --git a/tests/data/revision.odt b/tests/data/revision.odt new file mode 100644 index 0000000..d3b209b Binary files /dev/null and b/tests/data/revision.odt differ diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index b34e7a4..3ea044f 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -105,6 +105,23 @@ class TestGetMeta(unittest.TestCase): self.assertEqual(meta['meta:generator'], 'LibreOffice/3.3$Unix LibreOffice_project/330m19$Build-202') +class TestRemovingThumbnails(unittest.TestCase): + def test_odt(self): + shutil.copy('./tests/data/revision.odt', './tests/data/clean.odt') + + zipin = zipfile.ZipFile(os.path.abspath('./tests/data/clean.odt')) + self.assertIn('Thumbnails/thumbnail.png', zipin.namelist()) + zipin.close() + + p = office.LibreOfficeParser('./tests/data/clean.odt') + self.assertTrue(p.remove_all()) + + zipin = zipfile.ZipFile(os.path.abspath('./tests/data/clean.cleaned.odt')) + self.assertNotIn('Thumbnails/thumbnail.png', zipin.namelist()) + zipin.close() + + os.remove('./tests/data/clean.cleaned.odt') + class TestDeepCleaning(unittest.TestCase): def __check_deep_meta(self, p): tempdir = tempfile.mkdtemp()