1
0
mirror of synced 2024-11-22 09:14:23 +01:00

Improve the robustness of the CSS parser

This commit is contained in:
jvoisin 2019-12-15 06:44:21 -08:00
parent 615997be38
commit f67cd9d7dc

View File

@ -17,7 +17,11 @@ class CSSParser(abstract.AbstractParser):
def remove_all(self) -> bool: def remove_all(self) -> bool:
with open(self.filename, encoding='utf-8') as f: with open(self.filename, encoding='utf-8') as f:
cleaned = re.sub(r'/\*.*?\*/', '', f.read(), 0, self.flags) try:
content = f.read()
except UnicodeDecodeError: # pragma: no cover
raise ValueError
cleaned = re.sub(r'/\*.*?\*/', '', content, 0, self.flags)
with open(self.output_filename, 'w', encoding='utf-8') as f: with open(self.output_filename, 'w', encoding='utf-8') as f:
f.write(cleaned) f.write(cleaned)
return True return True
@ -25,7 +29,11 @@ class CSSParser(abstract.AbstractParser):
def get_meta(self) -> Dict[str, Any]: def get_meta(self) -> Dict[str, Any]:
metadata = {} metadata = {}
with open(self.filename, encoding='utf-8') as f: with open(self.filename, encoding='utf-8') as f:
cssdoc = re.findall(r'/\*(.*?)\*/', f.read(), self.flags) try:
content = f.read()
except UnicodeDecodeError: # pragma: no cover
raise ValueError
cssdoc = re.findall(r'/\*(.*?)\*/', content, self.flags)
for match in cssdoc: for match in cssdoc:
for line in match.splitlines(): for line in match.splitlines():
try: try: