From d00ca800b2d4ba7f72db7c77cefe8da43e42f735 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sun, 14 Mar 2021 14:41:40 +0100 Subject: [PATCH] Keep sharedStrings.xml when processing MSOffice sheets --- libmat2/office.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libmat2/office.py b/libmat2/office.py index 7e8c60c..4cd9da2 100644 --- a/libmat2/office.py +++ b/libmat2/office.py @@ -87,6 +87,7 @@ class MSOfficeParser(ZipParser): self.files_to_keep = set(map(re.compile, { # type: ignore r'^\[Content_Types\]\.xml$', r'^_rels/\.rels$', + r'^xl/sharedStrings\.xml$', # https://docs.microsoft.com/en-us/office/open-xml/working-with-the-shared-string-table r'^(?:word|ppt|xl)/_rels/document\.xml\.rels$', r'^(?:word|ppt|xl)/_rels/footer[0-9]*\.xml\.rels$', r'^(?:word|ppt|xl)/_rels/header[0-9]*\.xml\.rels$',