1
0
mirror of synced 2024-11-25 02:34:24 +01:00

Improve both the typing and the comments

This commit is contained in:
jvoisin 2018-10-05 17:00:59 +02:00
parent d0f3534eff
commit 0d25b18d26
3 changed files with 26 additions and 24 deletions

View File

@ -9,7 +9,7 @@ bandit:
script: # TODO: remove B405 and B314 script: # TODO: remove B405 and B314
- apt-get -qqy update - apt-get -qqy update
- apt-get -qqy install --no-install-recommends python3-bandit - apt-get -qqy install --no-install-recommends python3-bandit
- bandit ./mat2 --format txt - bandit ./mat2 --format txt --skip B101
- bandit -r ./nautilus/ --format txt --skip B101 - bandit -r ./nautilus/ --format txt --skip B101
- bandit -r ./libmat2 --format txt --skip B101,B404,B603,B405,B314 - bandit -r ./libmat2 --format txt --skip B101,B404,B603,B405,B314

View File

@ -2,7 +2,7 @@ import logging
import os import os
import re import re
import zipfile import zipfile
from typing import Dict, Set, Pattern from typing import Dict, Set, Pattern, Tuple
import xml.etree.ElementTree as ET # type: ignore import xml.etree.ElementTree as ET # type: ignore
@ -14,9 +14,8 @@ from .archive import ArchiveBasedAbstractParser
assert Set assert Set
assert Pattern assert Pattern
def _parse_xml(full_path: str): def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]:
""" This function parses XML, with namespace support. """ """ This function parses XML, with namespace support. """
namespace_map = dict() namespace_map = dict()
for _, (key, value) in ET.iterparse(full_path, ("start-ns", )): for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
# The ns[0-9]+ namespaces are reserved for internal usage, so # The ns[0-9]+ namespaces are reserved for internal usage, so
@ -183,20 +182,20 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
parent_map = {c:p for p in tree.iter() for c in p} parent_map = {c:p for p in tree.iter() for c in p}
elements = list() elements_del = list()
for element in tree.iterfind('.//w:del', namespace): for element in tree.iterfind('.//w:del', namespace):
elements.append(element) elements_del.append(element)
for element in elements: for element in elements_del:
parent_map[element].remove(element) parent_map[element].remove(element)
elements = list() elements_ins = list()
for element in tree.iterfind('.//w:ins', namespace): for element in tree.iterfind('.//w:ins', namespace):
for position, item in enumerate(tree.iter()): # pragma: no cover for position, item in enumerate(tree.iter()): # pragma: no cover
if item == element: if item == element:
for children in element.iterfind('./*'): for children in element.iterfind('./*'):
elements.append((element, position, children)) elements_ins.append((element, position, children))
break break
for (element, position, children) in elements: for (element, position, children) in elements_ins:
parent_map[element].insert(position, children) parent_map[element].insert(position, children)
parent_map[element].remove(element) parent_map[element].remove(element)

31
mat2
View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import os import os
from typing import Tuple from typing import Tuple, Generator, List
import sys import sys
import mimetypes import mimetypes
import argparse import argparse
@ -16,6 +16,10 @@ except ValueError as e:
__version__ = '0.4.0' __version__ = '0.4.0'
# Make pyflakes happy
assert Tuple
def __check_file(filename: str, mode: int=os.R_OK) -> bool: def __check_file(filename: str, mode: int=os.R_OK) -> bool:
if not os.path.exists(filename): if not os.path.exists(filename):
print("[-] %s is doesn't exist." % filename) print("[-] %s is doesn't exist." % filename)
@ -29,7 +33,7 @@ def __check_file(filename: str, mode: int=os.R_OK) -> bool:
return True return True
def create_arg_parser(): def create_arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2') parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
parser.add_argument('files', nargs='*', help='the files to process') parser.add_argument('files', nargs='*', help='the files to process')
parser.add_argument('-v', '--version', action='version', parser.add_argument('-v', '--version', action='version',
@ -63,19 +67,18 @@ def show_meta(filename: str):
return return
print("[+] Metadata for %s:" % filename) print("[+] Metadata for %s:" % filename)
meta = p.get_meta().items() metadata = p.get_meta().items()
if not meta: if not metadata:
print(" No metadata found") print(" No metadata found")
return return
for k, v in meta: for k, v in metadata:
try: # FIXME this is ugly. try: # FIXME this is ugly.
print(" %s: %s" % (k, v)) print(" %s: %s" % (k, v))
except UnicodeEncodeError: except UnicodeEncodeError:
print(" %s: harmful content" % k) print(" %s: harmful content" % k)
def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool: def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy) -> bool:
filename, is_lightweight, unknown_member_policy = params
if not __check_file(filename, os.R_OK|os.W_OK): if not __check_file(filename, os.R_OK|os.W_OK):
return False return False
@ -83,7 +86,7 @@ def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool:
if p is None: if p is None:
print("[-] %s's format (%s) is not supported" % (filename, mtype)) print("[-] %s's format (%s) is not supported" % (filename, mtype))
return False return False
p.unknown_member_policy = unknown_member_policy p.unknown_member_policy = policy
if is_lightweight: if is_lightweight:
return p.remove_all_lightweight() return p.remove_all_lightweight()
return p.remove_all() return p.remove_all()
@ -91,7 +94,7 @@ def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool:
def show_parsers(): def show_parsers():
print('[+] Supported formats:') print('[+] Supported formats:')
formats = list() formats = set()
for parser in parser_factory._get_parsers(): for parser in parser_factory._get_parsers():
for mtype in parser.mimetypes: for mtype in parser.mimetypes:
extensions = set() extensions = set()
@ -102,11 +105,11 @@ def show_parsers():
# we're not supporting a single extension in the current # we're not supporting a single extension in the current
# mimetype, so there is not point in showing the mimetype at all # mimetype, so there is not point in showing the mimetype at all
continue continue
formats.append(' - %s (%s)' % (mtype, ', '.join(extensions))) formats.add(' - %s (%s)' % (mtype, ', '.join(extensions)))
print('\n'.join(sorted(formats))) print('\n'.join(sorted(formats)))
def __get_files_recursively(files): def __get_files_recursively(files: List[str]) -> Generator[str, None, None]:
for f in files: for f in files:
if os.path.isdir(f): if os.path.isdir(f):
for path, _, _files in os.walk(f): for path, _, _files in os.walk(f):
@ -141,13 +144,13 @@ def main():
return 0 return 0
else: else:
unknown_member_policy = UnknownMemberPolicy(args.unknown_members) policy = UnknownMemberPolicy(args.unknown_members)
if unknown_member_policy == UnknownMemberPolicy.KEEP: if policy == UnknownMemberPolicy.KEEP:
logging.warning('Keeping unknown member files may leak metadata in the resulting file!') logging.warning('Keeping unknown member files may leak metadata in the resulting file!')
no_failure = True no_failure = True
for f in __get_files_recursively(args.files): for f in __get_files_recursively(args.files):
if clean_meta([f, args.lightweight, unknown_member_policy]) is False: if clean_meta(f, args.lightweight, policy) is False:
no_failure = False no_failure = False
return 0 if no_failure is True else -1 return 0 if no_failure is True else -1