1
0
Fork 0
mirror of synced 2025-07-03 11:57:26 +02:00

Remove docx revisions

This commit is contained in:
jvoisin 2018-07-01 23:11:10 +02:00
parent 02f7605ac1
commit bee56a57ce
3 changed files with 85 additions and 15 deletions

BIN
tests/data/revision.docx Normal file

Binary file not shown.

View file

@ -121,6 +121,7 @@ class TestRemovingThumbnails(unittest.TestCase):
zipin.close()
os.remove('./tests/data/clean.cleaned.odt')
os.remove('./tests/data/clean.odt')
class TestRevisionsCleaning(unittest.TestCase):
@ -142,6 +143,26 @@ class TestRevisionsCleaning(unittest.TestCase):
os.remove('./tests/data/clean.odt')
os.remove('./tests/data/clean.cleaned.odt')
def test_msoffice(self):
with zipfile.ZipFile('./tests/data/revision.docx') as zipin:
c = zipin.open('word/document.xml')
content = c.read()
r = b'<w:ins w:id="1" w:author="Unknown Author" w:date="2018-06-28T23:48:00Z">'
self.assertIn(r, content)
shutil.copy('./tests/data/revision.docx', './tests/data/revision_clean.docx')
p = office.MSOfficeParser('./tests/data/revision_clean.docx')
self.assertTrue(p.remove_all())
with zipfile.ZipFile('./tests/data/revision_clean.cleaned.docx') as zipin:
c = zipin.open('word/document.xml')
content = c.read()
r = b'<w:ins w:id="1" w:author="Unknown Author" w:date="2018-06-28T23:48:00Z">'
self.assertNotIn(r, content)
os.remove('./tests/data/revision_clean.docx')
os.remove('./tests/data/revision_clean.cleaned.docx')
class TestDeepCleaning(unittest.TestCase):
def __check_deep_meta(self, p):