From f67cd9d7dcf465bb83597cf9dd64fb8b6bc053db Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sun, 15 Dec 2019 06:44:21 -0800 Subject: [PATCH] Improve the robustness of the CSS parser --- libmat2/web.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/libmat2/web.py b/libmat2/web.py index b770200..2864d60 100644 --- a/libmat2/web.py +++ b/libmat2/web.py @@ -17,7 +17,11 @@ class CSSParser(abstract.AbstractParser): def remove_all(self) -> bool: with open(self.filename, encoding='utf-8') as f: - cleaned = re.sub(r'/\*.*?\*/', '', f.read(), 0, self.flags) + try: + content = f.read() + except UnicodeDecodeError: # pragma: no cover + raise ValueError + cleaned = re.sub(r'/\*.*?\*/', '', content, 0, self.flags) with open(self.output_filename, 'w', encoding='utf-8') as f: f.write(cleaned) return True @@ -25,7 +29,11 @@ class CSSParser(abstract.AbstractParser): def get_meta(self) -> Dict[str, Any]: metadata = {} with open(self.filename, encoding='utf-8') as f: - cssdoc = re.findall(r'/\*(.*?)\*/', f.read(), self.flags) + try: + content = f.read() + except UnicodeDecodeError: # pragma: no cover + raise ValueError + cssdoc = re.findall(r'/\*(.*?)\*/', content, self.flags) for match in cssdoc: for line in match.splitlines(): try: