1
0
Fork 0
mirror of synced 2025-07-03 03:47:20 +02:00

Add support for docx

This commit is contained in:
jvoisin 2018-03-31 15:47:06 +02:00
parent 302a5ea002
commit 865ad181ae
3 changed files with 72 additions and 0 deletions

BIN
tests/data/dirty.docx Normal file

Binary file not shown.

View file

@ -39,6 +39,11 @@ class TestGetMeta(unittest.TestCase):
meta = p.get_meta()
self.assertEqual(meta['TITLE'], ['I am so'])
def test_docx(self):
p = office.OfficeParser('./tests/data/dirty.docx')
meta = p.get_meta()
print(meta)
class TestCleaning(unittest.TestCase):
def test_pdf(self):
@ -131,3 +136,18 @@ class TestCleaning(unittest.TestCase):
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.flac')
def test_office(self):
shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
p = office.OfficeParser('./tests/data/clean.docx')
meta = p.get_meta()
self.assertIsNotNone(meta)
ret = p.remove_all()
self.assertTrue(ret)
p = office.OfficeParser('./tests/data/clean.docx.cleaned')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.docx')