1
0
Fork 0
mirror of synced 2025-07-04 04:17:29 +02:00

Make utf-8 explicit in all tree.write calls

This commit is contained in:
Alex Marchant 2024-04-03 15:27:48 -04:00
parent 1b9ce34e2c
commit f931a0ecee
2 changed files with 34 additions and 8 deletions

View file

@ -873,5 +873,31 @@ class TextDocx(unittest.TestCase):
# Check if 'word/comments.xml' exists in the zip
self.assertNotIn('word/comments.xml', zipin.namelist())
os.remove('./tests/data/comment_clean.docx')
os.remove('./tests/data/comment_clean.cleaned.docx')
def test_xml_is_utf8(self):
with zipfile.ZipFile('./tests/data/comment.docx') as zipin:
c = zipin.open('word/document.xml')
content = c.read()
# ensure encoding is utf-8
r = b'encoding=(\'|\")UTF-8(\'|\")'
match = re.search(r, content, re.IGNORECASE)
self.assertIsNotNone(match)
shutil.copy('./tests/data/comment.docx', './tests/data/comment_clean.docx')
p = office.MSOfficeParser('./tests/data/comment_clean.docx')
self.assertTrue(p.remove_all())
with zipfile.ZipFile('./tests/data/comment_clean.cleaned.docx') as zipin:
c = zipin.open('word/document.xml')
content = c.read()
# ensure encoding is still utf-8
r = b'encoding=(\'|\")UTF-8(\'|\")'
match = re.search(r, content, re.IGNORECASE)
self.assertIsNotNone(match)
os.remove('./tests/data/comment_clean.docx')
os.remove('./tests/data/comment_clean.cleaned.docx')