Improve both the typing and the comments
This commit is contained in:
parent
d0f3534eff
commit
0d25b18d26
@ -9,7 +9,7 @@ bandit:
|
|||||||
script: # TODO: remove B405 and B314
|
script: # TODO: remove B405 and B314
|
||||||
- apt-get -qqy update
|
- apt-get -qqy update
|
||||||
- apt-get -qqy install --no-install-recommends python3-bandit
|
- apt-get -qqy install --no-install-recommends python3-bandit
|
||||||
- bandit ./mat2 --format txt
|
- bandit ./mat2 --format txt --skip B101
|
||||||
- bandit -r ./nautilus/ --format txt --skip B101
|
- bandit -r ./nautilus/ --format txt --skip B101
|
||||||
- bandit -r ./libmat2 --format txt --skip B101,B404,B603,B405,B314
|
- bandit -r ./libmat2 --format txt --skip B101,B404,B603,B405,B314
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import zipfile
|
import zipfile
|
||||||
from typing import Dict, Set, Pattern
|
from typing import Dict, Set, Pattern, Tuple
|
||||||
|
|
||||||
import xml.etree.ElementTree as ET # type: ignore
|
import xml.etree.ElementTree as ET # type: ignore
|
||||||
|
|
||||||
@ -14,9 +14,8 @@ from .archive import ArchiveBasedAbstractParser
|
|||||||
assert Set
|
assert Set
|
||||||
assert Pattern
|
assert Pattern
|
||||||
|
|
||||||
def _parse_xml(full_path: str):
|
def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]:
|
||||||
""" This function parses XML, with namespace support. """
|
""" This function parses XML, with namespace support. """
|
||||||
|
|
||||||
namespace_map = dict()
|
namespace_map = dict()
|
||||||
for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
|
for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
|
||||||
# The ns[0-9]+ namespaces are reserved for internal usage, so
|
# The ns[0-9]+ namespaces are reserved for internal usage, so
|
||||||
@ -183,20 +182,20 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
|
|||||||
|
|
||||||
parent_map = {c:p for p in tree.iter() for c in p}
|
parent_map = {c:p for p in tree.iter() for c in p}
|
||||||
|
|
||||||
elements = list()
|
elements_del = list()
|
||||||
for element in tree.iterfind('.//w:del', namespace):
|
for element in tree.iterfind('.//w:del', namespace):
|
||||||
elements.append(element)
|
elements_del.append(element)
|
||||||
for element in elements:
|
for element in elements_del:
|
||||||
parent_map[element].remove(element)
|
parent_map[element].remove(element)
|
||||||
|
|
||||||
elements = list()
|
elements_ins = list()
|
||||||
for element in tree.iterfind('.//w:ins', namespace):
|
for element in tree.iterfind('.//w:ins', namespace):
|
||||||
for position, item in enumerate(tree.iter()): # pragma: no cover
|
for position, item in enumerate(tree.iter()): # pragma: no cover
|
||||||
if item == element:
|
if item == element:
|
||||||
for children in element.iterfind('./*'):
|
for children in element.iterfind('./*'):
|
||||||
elements.append((element, position, children))
|
elements_ins.append((element, position, children))
|
||||||
break
|
break
|
||||||
for (element, position, children) in elements:
|
for (element, position, children) in elements_ins:
|
||||||
parent_map[element].insert(position, children)
|
parent_map[element].insert(position, children)
|
||||||
parent_map[element].remove(element)
|
parent_map[element].remove(element)
|
||||||
|
|
||||||
|
31
mat2
31
mat2
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from typing import Tuple
|
from typing import Tuple, Generator, List
|
||||||
import sys
|
import sys
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import argparse
|
import argparse
|
||||||
@ -16,6 +16,10 @@ except ValueError as e:
|
|||||||
|
|
||||||
__version__ = '0.4.0'
|
__version__ = '0.4.0'
|
||||||
|
|
||||||
|
# Make pyflakes happy
|
||||||
|
assert Tuple
|
||||||
|
|
||||||
|
|
||||||
def __check_file(filename: str, mode: int=os.R_OK) -> bool:
|
def __check_file(filename: str, mode: int=os.R_OK) -> bool:
|
||||||
if not os.path.exists(filename):
|
if not os.path.exists(filename):
|
||||||
print("[-] %s is doesn't exist." % filename)
|
print("[-] %s is doesn't exist." % filename)
|
||||||
@ -29,7 +33,7 @@ def __check_file(filename: str, mode: int=os.R_OK) -> bool:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def create_arg_parser():
|
def create_arg_parser() -> argparse.ArgumentParser:
|
||||||
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
|
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
|
||||||
parser.add_argument('files', nargs='*', help='the files to process')
|
parser.add_argument('files', nargs='*', help='the files to process')
|
||||||
parser.add_argument('-v', '--version', action='version',
|
parser.add_argument('-v', '--version', action='version',
|
||||||
@ -63,19 +67,18 @@ def show_meta(filename: str):
|
|||||||
return
|
return
|
||||||
|
|
||||||
print("[+] Metadata for %s:" % filename)
|
print("[+] Metadata for %s:" % filename)
|
||||||
meta = p.get_meta().items()
|
metadata = p.get_meta().items()
|
||||||
if not meta:
|
if not metadata:
|
||||||
print(" No metadata found")
|
print(" No metadata found")
|
||||||
return
|
return
|
||||||
|
|
||||||
for k, v in meta:
|
for k, v in metadata:
|
||||||
try: # FIXME this is ugly.
|
try: # FIXME this is ugly.
|
||||||
print(" %s: %s" % (k, v))
|
print(" %s: %s" % (k, v))
|
||||||
except UnicodeEncodeError:
|
except UnicodeEncodeError:
|
||||||
print(" %s: harmful content" % k)
|
print(" %s: harmful content" % k)
|
||||||
|
|
||||||
def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool:
|
def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy) -> bool:
|
||||||
filename, is_lightweight, unknown_member_policy = params
|
|
||||||
if not __check_file(filename, os.R_OK|os.W_OK):
|
if not __check_file(filename, os.R_OK|os.W_OK):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -83,7 +86,7 @@ def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool:
|
|||||||
if p is None:
|
if p is None:
|
||||||
print("[-] %s's format (%s) is not supported" % (filename, mtype))
|
print("[-] %s's format (%s) is not supported" % (filename, mtype))
|
||||||
return False
|
return False
|
||||||
p.unknown_member_policy = unknown_member_policy
|
p.unknown_member_policy = policy
|
||||||
if is_lightweight:
|
if is_lightweight:
|
||||||
return p.remove_all_lightweight()
|
return p.remove_all_lightweight()
|
||||||
return p.remove_all()
|
return p.remove_all()
|
||||||
@ -91,7 +94,7 @@ def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool:
|
|||||||
|
|
||||||
def show_parsers():
|
def show_parsers():
|
||||||
print('[+] Supported formats:')
|
print('[+] Supported formats:')
|
||||||
formats = list()
|
formats = set()
|
||||||
for parser in parser_factory._get_parsers():
|
for parser in parser_factory._get_parsers():
|
||||||
for mtype in parser.mimetypes:
|
for mtype in parser.mimetypes:
|
||||||
extensions = set()
|
extensions = set()
|
||||||
@ -102,11 +105,11 @@ def show_parsers():
|
|||||||
# we're not supporting a single extension in the current
|
# we're not supporting a single extension in the current
|
||||||
# mimetype, so there is not point in showing the mimetype at all
|
# mimetype, so there is not point in showing the mimetype at all
|
||||||
continue
|
continue
|
||||||
formats.append(' - %s (%s)' % (mtype, ', '.join(extensions)))
|
formats.add(' - %s (%s)' % (mtype, ', '.join(extensions)))
|
||||||
print('\n'.join(sorted(formats)))
|
print('\n'.join(sorted(formats)))
|
||||||
|
|
||||||
|
|
||||||
def __get_files_recursively(files):
|
def __get_files_recursively(files: List[str]) -> Generator[str, None, None]:
|
||||||
for f in files:
|
for f in files:
|
||||||
if os.path.isdir(f):
|
if os.path.isdir(f):
|
||||||
for path, _, _files in os.walk(f):
|
for path, _, _files in os.walk(f):
|
||||||
@ -141,13 +144,13 @@ def main():
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
else:
|
else:
|
||||||
unknown_member_policy = UnknownMemberPolicy(args.unknown_members)
|
policy = UnknownMemberPolicy(args.unknown_members)
|
||||||
if unknown_member_policy == UnknownMemberPolicy.KEEP:
|
if policy == UnknownMemberPolicy.KEEP:
|
||||||
logging.warning('Keeping unknown member files may leak metadata in the resulting file!')
|
logging.warning('Keeping unknown member files may leak metadata in the resulting file!')
|
||||||
|
|
||||||
no_failure = True
|
no_failure = True
|
||||||
for f in __get_files_recursively(args.files):
|
for f in __get_files_recursively(args.files):
|
||||||
if clean_meta([f, args.lightweight, unknown_member_policy]) is False:
|
if clean_meta(f, args.lightweight, policy) is False:
|
||||||
no_failure = False
|
no_failure = False
|
||||||
return 0 if no_failure is True else -1
|
return 0 if no_failure is True else -1
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user