#!/usr/bin/env python3 import os from typing import Tuple import sys import mimetypes import argparse import logging try: from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS from libmat2 import check_dependencies, UnknownMemberPolicy except ValueError as e: print(e) sys.exit(1) __version__ = '0.4.0' def __check_file(filename: str, mode: int=os.R_OK) -> bool: if not os.path.exists(filename): print("[-] %s is doesn't exist." % filename) return False elif not os.path.isfile(filename): print("[-] %s is not a regular file." % filename) return False elif not os.access(filename, mode): print("[-] %s is not readable and writeable." % filename) return False return True def create_arg_parser(): parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2') parser.add_argument('files', nargs='*', help='the files to process') parser.add_argument('-v', '--version', action='version', version='MAT2 %s' % __version__) parser.add_argument('-l', '--list', action='store_true', help='list all supported fileformats') parser.add_argument('--check-dependencies', action='store_true', help='check if MAT2 has all the dependencies it needs') parser.add_argument('-V', '--verbose', action='store_true', help='show more verbose status information') parser.add_argument('--unknown-members', metavar='policy', default='abort', help='how to handle unknown members of archive-style files (policy should' + ' be one of: %s)' % ', '.join(p.value for p in UnknownMemberPolicy)) info = parser.add_mutually_exclusive_group() info.add_argument('-s', '--show', action='store_true', help='list harmful metadata detectable by MAT2 without removing them') info.add_argument('-L', '--lightweight', action='store_true', help='remove SOME metadata') return parser def show_meta(filename: str): if not __check_file(filename): return p, mtype = parser_factory.get_parser(filename) # type: ignore if p is None: print("[-] %s's format (%s) is not supported" % (filename, mtype)) return print("[+] Metadata for %s:" % filename) for k, v in p.get_meta().items(): try: # FIXME this is ugly. print(" %s: %s" % (k, v)) except UnicodeEncodeError: print(" %s: harmful content" % k) def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool: filename, is_lightweight, unknown_member_policy = params if not __check_file(filename, os.R_OK|os.W_OK): return False p, mtype = parser_factory.get_parser(filename) # type: ignore if p is None: print("[-] %s's format (%s) is not supported" % (filename, mtype)) return False p.unknown_member_policy = unknown_member_policy if is_lightweight: return p.remove_all_lightweight() return p.remove_all() def show_parsers(): print('[+] Supported formats:') formats = list() for parser in parser_factory._get_parsers(): for mtype in parser.mimetypes: extensions = set() for extension in mimetypes.guess_all_extensions(mtype): if extension[1:] not in UNSUPPORTED_EXTENSIONS: # skip the dot extensions.add(extension) if not extensions: # we're not supporting a single extension in the current # mimetype, so there is not point in showing the mimetype at all continue formats.append(' - %s (%s)' % (mtype, ', '.join(extensions))) print('\n'.join(sorted(formats))) def __get_files_recursively(files): for f in files: if os.path.isdir(f): for path, _, _files in os.walk(f): for _f in _files: fname = os.path.join(path, _f) if __check_file(fname): yield fname elif __check_file(f): yield f def main(): arg_parser = create_arg_parser() args = arg_parser.parse_args() if args.verbose: logging.basicConfig(level=logging.INFO) if not args.files: if args.list: show_parsers() elif args.check_dependencies: print("Dependencies required for MAT2 %s:" % __version__) for key, value in sorted(check_dependencies().items()): print('- %s: %s' % (key, 'yes' if value else 'no')) else: return arg_parser.print_help() return 0 elif args.show: for f in __get_files_recursively(args.files): show_meta(f) return 0 else: unknown_member_policy = UnknownMemberPolicy(args.unknown_members) if unknown_member_policy == UnknownMemberPolicy.KEEP: logging.warning('Keeping unknown member files may leak metadata in the resulting file!') no_failure = True for f in __get_files_recursively(args.files): if clean_meta([f, args.lightweight, unknown_member_policy]) is False: no_failure = False return 0 if no_failure is True else -1 if __name__ == '__main__': sys.exit(main())