From 5a5c642a463523bf8cc56ad13817b82900661bd4 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 3 Oct 2018 16:35:36 +0200 Subject: [PATCH] Don't break office files for MS Office We didn't take the whitelist into account while removing dangling files from [Content_types].xml --- libmat2/office.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/libmat2/office.py b/libmat2/office.py index 997a247..372d69a 100644 --- a/libmat2/office.py +++ b/libmat2/office.py @@ -217,8 +217,13 @@ class MSOfficeParser(ArchiveBasedAbstractParser): removed_fnames = set() with zipfile.ZipFile(self.filename) as zin: for fname in [item.filename for item in zin.infolist()]: - if any(map(lambda r: r.search(fname), self.files_to_omit)): # type: ignore - removed_fnames.add(fname) + for file_to_omit in self.files_to_omit: + if file_to_omit.search(fname): + matches = map(lambda r: r.search(fname), self.files_to_keep) + if any(matches): # the file is whitelisted + continue + removed_fnames.add(fname) + break root = tree.getroot() for item in root.findall('{%s}Override' % namespace['']):