1
0
mirror of synced 2024-11-22 09:14:23 +01:00

Improve a bit get_meta for libreoffice files

This commit is contained in:
jvoisin 2019-02-08 23:23:56 +01:00
parent 6cc034e81b
commit 6ef6aaa222

View File

@ -384,7 +384,7 @@ class LibreOfficeParser(ArchiveBasedAbstractParser):
return {}
with open(full_path, encoding='utf-8') as f:
try:
results = re.findall(r"<((?:meta|dc|cp).+?)>(.+)</\1>", f.read(), re.I|re.M)
results = re.findall(r"<((?:meta|dc|cp).+?)[^>]*>(.+)</\1>", f.read(), re.I|re.M)
return {k:v for (k, v) in results}
except (TypeError, UnicodeDecodeError): # We didn't manage to parse the xml file
# We didn't manage to parse the xml file