1
0
Fork 0
mat2/mat2

152 lines
5.3 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env python3
2018-04-02 19:12:10 +02:00
import os
from typing import Tuple
import sys
2018-04-01 17:13:34 +02:00
import mimetypes
2018-03-06 23:20:18 +01:00
import argparse
2018-09-01 14:14:32 +02:00
import logging
2018-03-06 23:20:18 +01:00
try:
2018-09-06 11:13:11 +02:00
from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS
from libmat2 import check_dependencies, UnknownMemberPolicy
except ValueError as e:
print(e)
sys.exit(1)
2018-03-06 23:20:18 +01:00
2018-10-03 16:12:03 +02:00
__version__ = '0.4.0'
2018-04-04 23:18:32 +02:00
2018-07-23 23:42:56 +02:00
def __check_file(filename: str, mode: int=os.R_OK) -> bool:
if not os.path.exists(filename):
print("[-] %s is doesn't exist." % filename)
return False
elif not os.path.isfile(filename):
2018-04-02 19:12:10 +02:00
print("[-] %s is not a regular file." % filename)
return False
elif not os.access(filename, mode):
print("[-] %s is not readable and writeable." % filename)
return False
return True
2018-03-06 23:20:18 +01:00
2018-03-06 23:20:18 +01:00
def create_arg_parser():
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
parser.add_argument('files', nargs='*', help='the files to process')
2018-05-14 22:44:31 +02:00
parser.add_argument('-v', '--version', action='version',
2018-05-16 22:36:59 +02:00
version='MAT2 %s' % __version__)
2018-05-15 23:27:58 +02:00
parser.add_argument('-l', '--list', action='store_true',
2018-05-16 22:36:59 +02:00
help='list all supported fileformats')
parser.add_argument('--check-dependencies', action='store_true',
2018-07-23 23:42:56 +02:00
help='check if MAT2 has all the dependencies it needs')
2018-09-01 14:14:32 +02:00
parser.add_argument('-V', '--verbose', action='store_true',
help='show more verbose status information')
parser.add_argument('--unknown-members', metavar='policy', default='abort',
help='how to handle unknown members of archive-style files (policy should' +
2018-09-06 11:13:11 +02:00
' be one of: %s)' % ', '.join(p.value for p in UnknownMemberPolicy))
2018-03-06 23:20:18 +01:00
2018-05-16 00:07:04 +02:00
info = parser.add_mutually_exclusive_group()
2018-03-06 23:20:18 +01:00
info.add_argument('-s', '--show', action='store_true',
help='list harmful metadata detectable by MAT2 without removing them')
2018-04-14 21:23:31 +02:00
info.add_argument('-L', '--lightweight', action='store_true',
help='remove SOME metadata')
2018-03-06 23:20:18 +01:00
return parser
2018-05-16 22:36:59 +02:00
def show_meta(filename: str):
2018-04-02 19:12:10 +02:00
if not __check_file(filename):
return
2018-06-04 22:54:01 +02:00
p, mtype = parser_factory.get_parser(filename) # type: ignore
2018-03-31 21:15:48 +02:00
if p is None:
2018-04-01 17:13:34 +02:00
print("[-] %s's format (%s) is not supported" % (filename, mtype))
2018-03-31 21:15:48 +02:00
return
2018-05-16 22:36:59 +02:00
2018-04-02 19:12:10 +02:00
print("[+] Metadata for %s:" % filename)
2018-05-16 22:36:59 +02:00
for k, v in p.get_meta().items():
2018-04-04 00:44:54 +02:00
try: # FIXME this is ugly.
print(" %s: %s" % (k, v))
except UnicodeEncodeError:
print(" %s: harmful content" % k)
2018-04-02 19:12:10 +02:00
def clean_meta(params: Tuple[str, bool, UnknownMemberPolicy]) -> bool:
filename, is_lightweight, unknown_member_policy = params
2018-04-02 19:12:10 +02:00
if not __check_file(filename, os.R_OK|os.W_OK):
2018-05-16 22:36:59 +02:00
return False
2018-04-02 19:12:10 +02:00
2018-06-04 22:54:01 +02:00
p, mtype = parser_factory.get_parser(filename) # type: ignore
2018-04-02 19:12:10 +02:00
if p is None:
print("[-] %s's format (%s) is not supported" % (filename, mtype))
return False
p.unknown_member_policy = unknown_member_policy
2018-08-30 23:11:35 +02:00
if is_lightweight:
2018-04-23 00:11:25 +02:00
return p.remove_all_lightweight()
return p.remove_all()
2018-03-06 23:20:18 +01:00
def show_parsers():
print('[+] Supported formats:')
2018-06-04 23:32:13 +02:00
formats = list()
for parser in parser_factory._get_parsers():
for mtype in parser.mimetypes:
extensions = set()
for extension in mimetypes.guess_all_extensions(mtype):
if extension[1:] not in UNSUPPORTED_EXTENSIONS: # skip the dot
extensions.add(extension)
if not extensions:
# we're not supporting a single extension in the current
# mimetype, so there is not point in showing the mimetype at all
continue
2018-06-04 23:32:13 +02:00
formats.append(' - %s (%s)' % (mtype, ', '.join(extensions)))
print('\n'.join(sorted(formats)))
def __get_files_recursively(files):
for f in files:
if os.path.isdir(f):
for path, _, _files in os.walk(f):
for _f in _files:
fname = os.path.join(path, _f)
if __check_file(fname):
yield fname
elif __check_file(f):
yield f
2018-03-06 23:20:18 +01:00
def main():
arg_parser = create_arg_parser()
args = arg_parser.parse_args()
2018-04-02 19:12:10 +02:00
2018-09-01 14:14:32 +02:00
if args.verbose:
logging.basicConfig(level=logging.INFO)
if not args.files:
if args.list:
show_parsers()
elif args.check_dependencies:
print("Dependencies required for MAT2 %s:" % __version__)
for key, value in sorted(check_dependencies().items()):
print('- %s: %s' % (key, 'yes' if value else 'no'))
else:
return arg_parser.print_help()
2018-05-16 22:36:59 +02:00
return 0
2018-04-14 16:10:45 +02:00
elif args.show:
2018-04-04 23:18:32 +02:00
for f in __get_files_recursively(args.files):
2018-03-06 23:20:18 +01:00
show_meta(f)
2018-05-16 22:36:59 +02:00
return 0
2018-04-14 16:10:45 +02:00
else:
unknown_member_policy = UnknownMemberPolicy(args.unknown_members)
if unknown_member_policy == UnknownMemberPolicy.KEEP:
logging.warning('Keeping unknown member files may leak metadata in the resulting file!')
2018-03-06 23:20:18 +01:00
2018-10-03 15:29:46 +02:00
no_failure = True
for f in __get_files_recursively(args.files):
if clean_meta([f, args.lightweight, unknown_member_policy]) is False:
2018-10-03 15:29:46 +02:00
no_failure = False
return 0 if no_failure is True else -1
2018-09-06 11:13:11 +02:00
2018-03-06 23:20:18 +01:00
if __name__ == '__main__':
sys.exit(main())