From 072ee1814d2d40788a93622fe6e753a9f434d515 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 5 Sep 2018 18:41:08 +0200 Subject: [PATCH] Remove defusedxml support and document why --- doc/implementation_notes.md | 8 ++++++++ libmat2/office.py | 6 +----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/doc/implementation_notes.md b/doc/implementation_notes.md index b763835..3b8e49d 100644 --- a/doc/implementation_notes.md +++ b/doc/implementation_notes.md @@ -61,3 +61,11 @@ Images handling When possible, images are handled like PDF: rendered on a surface, then saved to the filesystem. This ensures that every metadata is removed. +XML attacks +----------- + +Since our thread model conveniently excludes files crafted to specifically +bypass MAT2, fileformats containing harmful XML are out of our scope. +But since MAT2 is using [etree](https://docs.python.org/3/library/xml.html#xml-vulnerabilities) +to process XML, it's "only" vulnerable to DoS, and not memory corruption: +odds are that the user will notice that the cleaning didn't succeed. diff --git a/libmat2/office.py b/libmat2/office.py index 224067c..29100df 100644 --- a/libmat2/office.py +++ b/libmat2/office.py @@ -7,11 +7,7 @@ import zipfile import logging from typing import Dict, Set, Pattern -try: # protect against DoS - from defusedxml import ElementTree as ET # type: ignore -except ImportError: - import xml.etree.ElementTree as ET # type: ignore - +import xml.etree.ElementTree as ET # type: ignore from . import abstract, parser_factory