Improve both the typing and the comments
This commit is contained in:
parent
d0f3534eff
commit
0d25b18d26
@ -9,7 +9,7 @@ bandit:
|
||||
script: # TODO: remove B405 and B314
|
||||
- apt-get -qqy update
|
||||
- apt-get -qqy install --no-install-recommends python3-bandit
|
||||
- bandit ./mat2 --format txt
|
||||
- bandit ./mat2 --format txt --skip B101
|
||||
- bandit -r ./nautilus/ --format txt --skip B101
|
||||
- bandit -r ./libmat2 --format txt --skip B101,B404,B603,B405,B314
|
||||
|
||||
|
@ -2,7 +2,7 @@ import logging
|
||||
import os
|
||||
import re
|
||||
import zipfile
|
||||
from typing import Dict, Set, Pattern
|
||||
from typing import Dict, Set, Pattern, Tuple
|
||||
|
||||
import xml.etree.ElementTree as ET # type: ignore
|
||||
|
||||
@ -14,9 +14,8 @@ from .archive import ArchiveBasedAbstractParser
|
||||
assert Set
|
||||
assert Pattern
|
||||
|
||||
def _parse_xml(full_path: str):
|
||||
def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]:
|
||||
""" This function parses XML, with namespace support. """
|
||||
|
||||
namespace_map = dict()
|
||||
for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
|
||||
# The ns[0-9]+ namespaces are reserved for internal usage, so
|
||||
@ -183,20 +182,20 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
|
||||
|
||||
parent_map = {c:p for p in tree.iter() for c in p}
|
||||
|
||||
elements = list()
|
||||
elements_del = list()
|
||||
for element in tree.iterfind('.//w:del', namespace):
|
||||
elements.append(element)
|
||||
for element in elements:
|
||||
elements_del.append(element)
|
||||
for element in elements_del:
|
||||
parent_map[element].remove(element)
|
||||
|
||||
elements = list()
|
||||
elements_ins = list()
|
||||
for element in tree.iterfind('.//w:ins', namespace):
|
||||
for position, item in enumerate(tree.iter()): # pragma: no cover
|
||||
if item == element:
|
||||
for children in element.iterfind('./*'):
|
||||
elements.append((element, position, children))
|
||||
elements_ins.append((element, position, children))
|
||||
break
|
||||
for (element, position, children) in elements:
|
||||
for (element, position, children) in elements_ins:
|
||||
parent_map[element].insert(position, children)
|
||||
parent_map[element].remove(element)
|
||||
|
||||
|
31
mat2
31
mat2
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
from typing import Tuple
|
||||
from typing import Tuple, Generator, List
|
||||
import sys
|
||||
import mimetypes
|
||||
import argparse
|
||||
@ -16,6 +16,10 @@ except ValueError as e:
|
||||
|
||||
__version__ = '0.4.0'
|
||||
|
||||
# Make pyflakes happy
|
||||
assert Tuple
|
||||
|
||||
|
||||
def __check_file(filename: str, mode: int=os.R_OK) -> bool:
|
||||
if not os.path.exists(filename):
|
||||
print("[-] %s is doesn't exist." % filename)
|
||||
@ -29,7 +33,7 @@ def __check_file(filename: str, mode: int=os.R_OK) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def create_arg_parser():
|
||||
def create_arg_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
|
||||
parser.add_argument('files', nargs='*', help='the files to process')
|
||||
parser.add_argument('-v', '--version', action='version',
|
||||
@ -63,19 +67,18 @@ def show_meta(filename: str):
|
||||
return
|
||||
|
||||
print("[+] Metadata for %s:" % filename)
|
||||
meta = p.get_meta().items()
|
||||
if not meta:
|
||||
metadata = p.get_meta().items()
|
||||
if not metadata:
|
||||
print(" No metadata found")
|
||||
return
|
||||
|
||||
for k, v in meta:
|
||||
for k, v in metadata:
|
||||
try: # FIXME this is ugly.
|
||||
print(" %s: %s" % (k, v))
|
||||
except UnicodeEncodeError:
|
||||
print(" %s: harmful content" % k)
|
||||
|
||||
def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool:
|
||||
filename, is_lightweight, unknown_member_policy = params
|
||||
def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy) -> bool:
|
||||
if not __check_file(filename, os.R_OK|os.W_OK):
|
||||
return False
|
||||
|
||||
@ -83,7 +86,7 @@ def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool:
|
||||
if p is None:
|
||||
print("[-] %s's format (%s) is not supported" % (filename, mtype))
|
||||
return False
|
||||
p.unknown_member_policy = unknown_member_policy
|
||||
p.unknown_member_policy = policy
|
||||
if is_lightweight:
|
||||
return p.remove_all_lightweight()
|
||||
return p.remove_all()
|
||||
@ -91,7 +94,7 @@ def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool:
|
||||
|
||||
def show_parsers():
|
||||
print('[+] Supported formats:')
|
||||
formats = list()
|
||||
formats = set()
|
||||
for parser in parser_factory._get_parsers():
|
||||
for mtype in parser.mimetypes:
|
||||
extensions = set()
|
||||
@ -102,11 +105,11 @@ def show_parsers():
|
||||
# we're not supporting a single extension in the current
|
||||
# mimetype, so there is not point in showing the mimetype at all
|
||||
continue
|
||||
formats.append(' - %s (%s)' % (mtype, ', '.join(extensions)))
|
||||
formats.add(' - %s (%s)' % (mtype, ', '.join(extensions)))
|
||||
print('\n'.join(sorted(formats)))
|
||||
|
||||
|
||||
def __get_files_recursively(files):
|
||||
def __get_files_recursively(files: List[str]) -> Generator[str, None, None]:
|
||||
for f in files:
|
||||
if os.path.isdir(f):
|
||||
for path, _, _files in os.walk(f):
|
||||
@ -141,13 +144,13 @@ def main():
|
||||
return 0
|
||||
|
||||
else:
|
||||
unknown_member_policy = UnknownMemberPolicy(args.unknown_members)
|
||||
if unknown_member_policy == UnknownMemberPolicy.KEEP:
|
||||
policy = UnknownMemberPolicy(args.unknown_members)
|
||||
if policy == UnknownMemberPolicy.KEEP:
|
||||
logging.warning('Keeping unknown member files may leak metadata in the resulting file!')
|
||||
|
||||
no_failure = True
|
||||
for f in __get_files_recursively(args.files):
|
||||
if clean_meta([f, args.lightweight, unknown_member_policy]) is False:
|
||||
if clean_meta(f, args.lightweight, policy) is False:
|
||||
no_failure = False
|
||||
return 0 if no_failure is True else -1
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user