Improve a bit get_meta for libreoffice files
This commit is contained in:
parent
6cc034e81b
commit
6ef6aaa222
@ -384,7 +384,7 @@ class LibreOfficeParser(ArchiveBasedAbstractParser):
|
||||
return {}
|
||||
with open(full_path, encoding='utf-8') as f:
|
||||
try:
|
||||
results = re.findall(r"<((?:meta|dc|cp).+?)>(.+)</\1>", f.read(), re.I|re.M)
|
||||
results = re.findall(r"<((?:meta|dc|cp).+?)[^>]*>(.+)</\1>", f.read(), re.I|re.M)
|
||||
return {k:v for (k, v) in results}
|
||||
except (TypeError, UnicodeDecodeError): # We didn't manage to parse the xml file
|
||||
# We didn't manage to parse the xml file
|
||||
|
Loading…
Reference in New Issue
Block a user