Make utf-8 explicit in all tree.write calls
This commit is contained in:
parent
1b9ce34e2c
commit
f931a0ecee
@ -38,7 +38,7 @@ def _sort_xml_attributes(full_path: str) -> bool:
|
||||
for c in tree.getroot():
|
||||
c[:] = sorted(c, key=lambda child: (child.tag, child.get('desc')))
|
||||
|
||||
tree.write(full_path, xml_declaration=True)
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
|
||||
@ -220,7 +220,7 @@ class MSOfficeParser(ZipParser):
|
||||
for element in elements_to_remove:
|
||||
parent_map[element].remove(element)
|
||||
|
||||
tree.write(full_path, xml_declaration=True)
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
@ -250,7 +250,7 @@ class MSOfficeParser(ZipParser):
|
||||
for element in elements_to_remove:
|
||||
parent_map[element].remove(element)
|
||||
|
||||
tree.write(full_path, xml_declaration=True)
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
@ -287,7 +287,7 @@ class MSOfficeParser(ZipParser):
|
||||
parent_map[element].insert(position, children)
|
||||
parent_map[element].remove(element)
|
||||
|
||||
tree.write(full_path, xml_declaration=True)
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
def __remove_content_type_members(self, full_path: str) -> bool:
|
||||
@ -320,7 +320,7 @@ class MSOfficeParser(ZipParser):
|
||||
if name in removed_fnames:
|
||||
root.remove(item)
|
||||
|
||||
tree.write(full_path, xml_declaration=True)
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
def _final_checks(self) -> bool:
|
||||
@ -355,7 +355,7 @@ class MSOfficeParser(ZipParser):
|
||||
|
||||
for item in tree.iterfind('.//p14:creationId', namespace):
|
||||
item.set('val', '%s' % random.randint(0, 2**32))
|
||||
tree.write(full_path, xml_declaration=True)
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
@ -371,7 +371,7 @@ class MSOfficeParser(ZipParser):
|
||||
|
||||
for item in tree.iterfind('.//p:sldMasterId', namespace):
|
||||
item.set('id', '%s' % random.randint(0, 2**32))
|
||||
tree.write(full_path, xml_declaration=True)
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
def _specific_cleanup(self, full_path: str) -> bool:
|
||||
@ -514,7 +514,7 @@ class LibreOfficeParser(ZipParser):
|
||||
for changes in text.iterfind('.//text:tracked-changes', namespace):
|
||||
text.remove(changes)
|
||||
|
||||
tree.write(full_path, xml_declaration=True)
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
def _specific_cleanup(self, full_path: str) -> bool:
|
||||
|
@ -873,5 +873,31 @@ class TextDocx(unittest.TestCase):
|
||||
# Check if 'word/comments.xml' exists in the zip
|
||||
self.assertNotIn('word/comments.xml', zipin.namelist())
|
||||
|
||||
os.remove('./tests/data/comment_clean.docx')
|
||||
os.remove('./tests/data/comment_clean.cleaned.docx')
|
||||
|
||||
def test_xml_is_utf8(self):
|
||||
with zipfile.ZipFile('./tests/data/comment.docx') as zipin:
|
||||
c = zipin.open('word/document.xml')
|
||||
content = c.read()
|
||||
|
||||
# ensure encoding is utf-8
|
||||
r = b'encoding=(\'|\")UTF-8(\'|\")'
|
||||
match = re.search(r, content, re.IGNORECASE)
|
||||
self.assertIsNotNone(match)
|
||||
|
||||
shutil.copy('./tests/data/comment.docx', './tests/data/comment_clean.docx')
|
||||
p = office.MSOfficeParser('./tests/data/comment_clean.docx')
|
||||
self.assertTrue(p.remove_all())
|
||||
|
||||
with zipfile.ZipFile('./tests/data/comment_clean.cleaned.docx') as zipin:
|
||||
c = zipin.open('word/document.xml')
|
||||
content = c.read()
|
||||
|
||||
# ensure encoding is still utf-8
|
||||
r = b'encoding=(\'|\")UTF-8(\'|\")'
|
||||
match = re.search(r, content, re.IGNORECASE)
|
||||
self.assertIsNotNone(match)
|
||||
|
||||
os.remove('./tests/data/comment_clean.docx')
|
||||
os.remove('./tests/data/comment_clean.cleaned.docx')
|
Loading…
Reference in New Issue
Block a user