Use [Content_Types].xml to improve MS Office coverage
This commit is contained in:
parent
5b606f939d
commit
c67bbafb2c
6 changed files with 91 additions and 29 deletions
BIN
tests/data/broken_xml_content_types.docx
Normal file
BIN
tests/data/broken_xml_content_types.docx
Normal file
Binary file not shown.
Binary file not shown.
BIN
tests/data/no_content_types.docx
Normal file
BIN
tests/data/no_content_types.docx
Normal file
Binary file not shown.
|
@ -86,14 +86,26 @@ class TestExplicitelyUnsupportedFiles(unittest.TestCase):
|
|||
os.remove('./tests/data/clean.py')
|
||||
|
||||
|
||||
class TestCorruptedContentTypesOffice(unittest.TestCase):
|
||||
def test_office(self):
|
||||
class TestWrongContentTypesFileOffice(unittest.TestCase):
|
||||
def test_office_incomplete(self):
|
||||
shutil.copy('./tests/data/malformed_content_types.docx', './tests/data/clean.docx')
|
||||
p = office.MSOfficeParser('./tests/data/clean.docx')
|
||||
self.assertIsNotNone(p)
|
||||
self.assertFalse(p.remove_all())
|
||||
os.remove('./tests/data/clean.docx')
|
||||
|
||||
def test_office_broken(self):
|
||||
shutil.copy('./tests/data/broken_xml_content_types.docx', './tests/data/clean.docx')
|
||||
with self.assertRaises(ValueError):
|
||||
office.MSOfficeParser('./tests/data/clean.docx')
|
||||
os.remove('./tests/data/clean.docx')
|
||||
|
||||
def test_office_absent(self):
|
||||
shutil.copy('./tests/data/no_content_types.docx', './tests/data/clean.docx')
|
||||
with self.assertRaises(ValueError):
|
||||
office.MSOfficeParser('./tests/data/clean.docx')
|
||||
os.remove('./tests/data/clean.docx')
|
||||
|
||||
class TestCorruptedFiles(unittest.TestCase):
|
||||
def test_pdf(self):
|
||||
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue