diff --git a/doc/implementation_notes.md b/doc/implementation_notes.md index b763835..3b8e49d 100644 --- a/doc/implementation_notes.md +++ b/doc/implementation_notes.md @@ -61,3 +61,11 @@ Images handling When possible, images are handled like PDF: rendered on a surface, then saved to the filesystem. This ensures that every metadata is removed. +XML attacks +----------- + +Since our thread model conveniently excludes files crafted to specifically +bypass MAT2, fileformats containing harmful XML are out of our scope. +But since MAT2 is using [etree](https://docs.python.org/3/library/xml.html#xml-vulnerabilities) +to process XML, it's "only" vulnerable to DoS, and not memory corruption: +odds are that the user will notice that the cleaning didn't succeed. diff --git a/libmat2/office.py b/libmat2/office.py index 224067c..29100df 100644 --- a/libmat2/office.py +++ b/libmat2/office.py @@ -7,11 +7,7 @@ import zipfile import logging from typing import Dict, Set, Pattern -try: # protect against DoS - from defusedxml import ElementTree as ET # type: ignore -except ImportError: - import xml.etree.ElementTree as ET # type: ignore - +import xml.etree.ElementTree as ET # type: ignore from . import abstract, parser_factory