1
0
mirror of synced 2024-12-22 12:50:13 +01:00

Don't break office files for MS Office

We didn't take the whitelist into account while
removing dangling files from [Content_types].xml
This commit is contained in:
jvoisin 2018-10-03 16:35:36 +02:00
parent 84e302ac93
commit 5a5c642a46

View File

@ -217,8 +217,13 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
removed_fnames = set()
with zipfile.ZipFile(self.filename) as zin:
for fname in [item.filename for item in zin.infolist()]:
if any(map(lambda r: r.search(fname), self.files_to_omit)): # type: ignore
removed_fnames.add(fname)
for file_to_omit in self.files_to_omit:
if file_to_omit.search(fname):
matches = map(lambda r: r.search(fname), self.files_to_keep)
if any(matches): # the file is whitelisted
continue
removed_fnames.add(fname)
break
root = tree.getroot()
for item in root.findall('{%s}Override' % namespace['']):