1
0
mirror of synced 2024-11-25 10:44:23 +01:00

Unknown Members: make policy use an Enum

Closes #60

Note: this changeset also ensures that clean.cleaned.docx is removed
up after the pytest is over.
This commit is contained in:
Daniel Kahn Gillmor 2018-09-05 18:49:35 -04:00
parent 2d9ba81a84
commit f3cef319b9
4 changed files with 25 additions and 23 deletions

View File

@ -2,6 +2,7 @@
import os
import collections
from enum import Enum
import importlib
from typing import Dict, Optional
@ -62,3 +63,8 @@ def check_dependencies() -> dict:
ret[value] = False # pragma: no cover
return ret
class UnknownMemberPolicy(Enum):
ABORT = 'abort'
OMIT = 'omit'
KEEP = 'keep'

View File

@ -9,7 +9,7 @@ from typing import Dict, Set, Pattern
import xml.etree.ElementTree as ET # type: ignore
from . import abstract, parser_factory
from . import abstract, parser_factory, UnknownMemberPolicy
# Make pyflakes happy
assert Set
@ -37,8 +37,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
files_to_omit = set() # type: Set[Pattern]
# what should the parser do if it encounters an unknown file in
# the archive? valid policies are 'abort', 'omit', 'keep'
unknown_member_policy = 'abort' # type: str
# the archive?
unknown_member_policy = UnknownMemberPolicy.ABORT # type: UnknownMemberPolicy
def __init__(self, filename):
super().__init__(filename)
@ -81,10 +81,6 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
def remove_all(self) -> bool:
# pylint: disable=too-many-branches
if self.unknown_member_policy not in ['omit', 'keep', 'abort']:
logging.error("The policy %s is invalid.", self.unknown_member_policy)
raise ValueError
with zipfile.ZipFile(self.filename) as zin,\
zipfile.ZipFile(self.output_filename, 'w') as zout:
@ -113,11 +109,11 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# supported files that we want to clean then add
tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore
if not tmp_parser:
if self.unknown_member_policy == 'omit':
if self.unknown_member_policy == UnknownMemberPolicy.OMIT:
logging.warning("In file %s, omitting unknown element %s (format: %s)",
self.filename, item.filename, mtype)
continue
elif self.unknown_member_policy == 'keep':
elif self.unknown_member_policy == UnknownMemberPolicy.KEEP:
logging.warning("In file %s, keeping unknown element %s (format: %s)",
self.filename, item.filename, mtype)
else:

17
mat2
View File

@ -10,7 +10,8 @@ import multiprocessing
import logging
try:
from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS, check_dependencies
from libmat2 import (parser_factory, UNSUPPORTED_EXTENSIONS, check_dependencies,
UnknownMemberPolicy)
except ValueError as e:
print(e)
sys.exit(1)
@ -42,8 +43,8 @@ def create_arg_parser():
parser.add_argument('-V', '--verbose', action='store_true',
help='show more verbose status information')
parser.add_argument('--unknown-members', metavar='policy', default='abort',
help='how to handle unknown members of archive-style files ' +
'(policy should be abort, omit, or keep)')
help='how to handle unknown members of archive-style files (policy should' +
' be one of: ' + ', '.join([x.value for x in UnknownMemberPolicy]) + ')')
info = parser.add_mutually_exclusive_group()
@ -70,7 +71,7 @@ def show_meta(filename: str):
except UnicodeEncodeError:
print(" %s: harmful content" % k)
def clean_meta(params: Tuple[str, bool, str]) -> bool:
def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool:
filename, is_lightweight, unknown_member_policy = params
if not __check_file(filename, os.R_OK|os.W_OK):
return False
@ -137,15 +138,13 @@ def main():
return 0
else:
if args.unknown_members == 'keep':
unknown_member_policy = UnknownMemberPolicy(args.unknown_members)
if unknown_member_policy == UnknownMemberPolicy.KEEP:
logging.warning('Keeping unknown member files may leak metadata in the resulting file!')
elif args.unknown_members not in ['omit', 'abort']:
logging.warning('Undefined policy for handling unknown member files: "%s"',
args.unknown_members)
p = multiprocessing.Pool()
mode = (args.lightweight is True)
l = zip(__get_files_recursively(args.files), itertools.repeat(mode),
itertools.repeat(args.unknown_members))
itertools.repeat(unknown_member_policy))
ret = list(p.imap_unordered(clean_meta, list(l)))
return 0 if all(ret) else -1

View File

@ -4,28 +4,29 @@ import unittest
import shutil
import os
from libmat2 import office
from libmat2 import office, UnknownMemberPolicy
class TestPolicy(unittest.TestCase):
def test_policy_omit(self):
shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
p.unknown_member_policy = 'omit'
p.unknown_member_policy = UnknownMemberPolicy.OMIT
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx')
def test_policy_keep(self):
shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
p.unknown_member_policy = 'keep'
p.unknown_member_policy = UnknownMemberPolicy.KEEP
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx')
def test_policy_unknown(self):
shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
p.unknown_member_policy = 'unknown_policy_name_totally_invalid'
with self.assertRaises(ValueError):
p.remove_all()
p.unknown_member_policy = UnknownMemberPolicy('unknown_policy_name_totally_invalid')
os.remove('./tests/data/clean.docx')