1
0
Fork 0
mirror of synced 2025-07-04 20:37:34 +02:00

Use [Content_Types].xml to improve MS Office coverage

This commit is contained in:
jvoisin 2018-10-01 22:26:35 +02:00
parent 5b606f939d
commit c67bbafb2c
6 changed files with 91 additions and 29 deletions

Binary file not shown.

Binary file not shown.

View file

@ -86,14 +86,26 @@ class TestExplicitelyUnsupportedFiles(unittest.TestCase):
os.remove('./tests/data/clean.py')
class TestCorruptedContentTypesOffice(unittest.TestCase):
def test_office(self):
class TestWrongContentTypesFileOffice(unittest.TestCase):
def test_office_incomplete(self):
shutil.copy('./tests/data/malformed_content_types.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
self.assertIsNotNone(p)
self.assertFalse(p.remove_all())
os.remove('./tests/data/clean.docx')
def test_office_broken(self):
shutil.copy('./tests/data/broken_xml_content_types.docx', './tests/data/clean.docx')
with self.assertRaises(ValueError):
office.MSOfficeParser('./tests/data/clean.docx')
os.remove('./tests/data/clean.docx')
def test_office_absent(self):
shutil.copy('./tests/data/no_content_types.docx', './tests/data/clean.docx')
with self.assertRaises(ValueError):
office.MSOfficeParser('./tests/data/clean.docx')
os.remove('./tests/data/clean.docx')
class TestCorruptedFiles(unittest.TestCase):
def test_pdf(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')