Improve a bit get_meta for libreoffice files
This commit is contained in:
parent
6cc034e81b
commit
6ef6aaa222
@ -384,7 +384,7 @@ class LibreOfficeParser(ArchiveBasedAbstractParser):
|
|||||||
return {}
|
return {}
|
||||||
with open(full_path, encoding='utf-8') as f:
|
with open(full_path, encoding='utf-8') as f:
|
||||||
try:
|
try:
|
||||||
results = re.findall(r"<((?:meta|dc|cp).+?)>(.+)</\1>", f.read(), re.I|re.M)
|
results = re.findall(r"<((?:meta|dc|cp).+?)[^>]*>(.+)</\1>", f.read(), re.I|re.M)
|
||||||
return {k:v for (k, v) in results}
|
return {k:v for (k, v) in results}
|
||||||
except (TypeError, UnicodeDecodeError): # We didn't manage to parse the xml file
|
except (TypeError, UnicodeDecodeError): # We didn't manage to parse the xml file
|
||||||
# We didn't manage to parse the xml file
|
# We didn't manage to parse the xml file
|
||||||
|
Loading…
Reference in New Issue
Block a user