1
0
mirror of synced 2024-12-22 04:39:58 +01:00

Remove the thumbnails from libreoffice files

This commit is contained in:
jvoisin 2018-07-01 17:29:05 +02:00
parent 177184ac67
commit 80fc4ffb40
3 changed files with 18 additions and 0 deletions

View File

@ -146,6 +146,7 @@ class LibreOfficeParser(ArchiveBasedAbstractParser):
files_to_omit = set(map(re.compile, { # type: ignore
'^meta\.xml$',
'^Configurations2/',
'^Thumbnails/',
}))
def get_meta(self) -> Dict[str, str]:

BIN
tests/data/revision.odt Normal file

Binary file not shown.

View File

@ -105,6 +105,23 @@ class TestGetMeta(unittest.TestCase):
self.assertEqual(meta['meta:generator'], 'LibreOffice/3.3$Unix LibreOffice_project/330m19$Build-202')
class TestRemovingThumbnails(unittest.TestCase):
def test_odt(self):
shutil.copy('./tests/data/revision.odt', './tests/data/clean.odt')
zipin = zipfile.ZipFile(os.path.abspath('./tests/data/clean.odt'))
self.assertIn('Thumbnails/thumbnail.png', zipin.namelist())
zipin.close()
p = office.LibreOfficeParser('./tests/data/clean.odt')
self.assertTrue(p.remove_all())
zipin = zipfile.ZipFile(os.path.abspath('./tests/data/clean.cleaned.odt'))
self.assertNotIn('Thumbnails/thumbnail.png', zipin.namelist())
zipin.close()
os.remove('./tests/data/clean.cleaned.odt')
class TestDeepCleaning(unittest.TestCase):
def __check_deep_meta(self, p):
tempdir = tempfile.mkdtemp()