2018-04-04 00:21:39 +02:00
|
|
|
#!/usr/bin/python3
|
2018-04-04 23:15:00 +02:00
|
|
|
|
2018-04-02 19:12:10 +02:00
|
|
|
import os
|
2018-04-01 17:13:34 +02:00
|
|
|
import mimetypes
|
2018-03-06 23:20:18 +01:00
|
|
|
import argparse
|
2018-04-14 16:10:45 +02:00
|
|
|
from threading import Thread
|
|
|
|
import multiprocessing
|
|
|
|
from queue import Queue
|
2018-03-06 23:20:18 +01:00
|
|
|
|
2018-03-19 23:43:49 +01:00
|
|
|
from src import parser_factory
|
2018-03-06 23:20:18 +01:00
|
|
|
|
2018-04-04 23:18:32 +02:00
|
|
|
|
2018-04-02 23:40:00 +02:00
|
|
|
def __check_file(filename:str, mode:int = os.R_OK) -> bool:
|
2018-04-02 19:12:10 +02:00
|
|
|
if not os.path.isfile(filename):
|
|
|
|
print("[-] %s is not a regular file." % filename)
|
|
|
|
return False
|
|
|
|
elif not os.access(filename, mode):
|
|
|
|
print("[-] %s is not readable and writeable." % filename)
|
|
|
|
return False
|
|
|
|
return True
|
2018-03-06 23:20:18 +01:00
|
|
|
|
2018-04-04 23:21:48 +02:00
|
|
|
|
2018-03-06 23:20:18 +01:00
|
|
|
def create_arg_parser():
|
|
|
|
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
|
|
|
|
parser.add_argument('files', nargs='*')
|
|
|
|
|
|
|
|
info = parser.add_argument_group('Information')
|
|
|
|
info.add_argument('-c', '--check', action='store_true',
|
|
|
|
help='check if a file is free of harmful metadatas')
|
|
|
|
info.add_argument('-l', '--list', action='store_true',
|
|
|
|
help='list all supported fileformats')
|
|
|
|
info.add_argument('-s', '--show', action='store_true',
|
|
|
|
help='list all the harmful metadata of a file without removing them')
|
2018-04-14 21:23:31 +02:00
|
|
|
info.add_argument('-L', '--lightweight', action='store_true',
|
|
|
|
help='remove SOME metadata')
|
2018-03-06 23:20:18 +01:00
|
|
|
return parser
|
|
|
|
|
2018-04-04 23:21:48 +02:00
|
|
|
|
2018-04-01 17:13:34 +02:00
|
|
|
def show_meta(filename:str):
|
2018-04-02 19:12:10 +02:00
|
|
|
if not __check_file(filename):
|
|
|
|
return
|
|
|
|
|
2018-04-02 17:36:26 +02:00
|
|
|
p, mtype = parser_factory.get_parser(filename)
|
2018-03-31 21:15:48 +02:00
|
|
|
if p is None:
|
2018-04-01 17:13:34 +02:00
|
|
|
print("[-] %s's format (%s) is not supported" % (filename, mtype))
|
2018-03-31 21:15:48 +02:00
|
|
|
return
|
2018-04-02 19:12:10 +02:00
|
|
|
print("[+] Metadata for %s:" % filename)
|
2018-03-06 23:20:18 +01:00
|
|
|
for k,v in p.get_meta().items():
|
2018-04-04 00:44:54 +02:00
|
|
|
try: # FIXME this is ugly.
|
|
|
|
print(" %s: %s" % (k, v))
|
|
|
|
except UnicodeEncodeError:
|
|
|
|
print(" %s: harmful content" % k)
|
2018-04-02 19:12:10 +02:00
|
|
|
|
2018-04-04 23:21:48 +02:00
|
|
|
|
2018-04-23 00:11:25 +02:00
|
|
|
def clean_meta(filename:str, is_lightweigth:bool) -> bool:
|
2018-04-02 19:12:10 +02:00
|
|
|
if not __check_file(filename, os.R_OK|os.W_OK):
|
|
|
|
return
|
|
|
|
|
2018-04-03 23:57:13 +02:00
|
|
|
p, mtype = parser_factory.get_parser(filename)
|
2018-04-02 19:12:10 +02:00
|
|
|
if p is None:
|
|
|
|
print("[-] %s's format (%s) is not supported" % (filename, mtype))
|
|
|
|
return
|
2018-04-14 21:23:31 +02:00
|
|
|
if is_lightweigth:
|
2018-04-23 00:11:25 +02:00
|
|
|
return p.remove_all_lightweight()
|
|
|
|
return p.remove_all()
|
2018-03-06 23:20:18 +01:00
|
|
|
|
2018-04-04 23:21:48 +02:00
|
|
|
|
2018-04-04 23:15:00 +02:00
|
|
|
def show_parsers():
|
|
|
|
print('[+] Supported formats:')
|
|
|
|
for parser in parser_factory._get_parsers():
|
|
|
|
for mtype in parser.mimetypes:
|
|
|
|
extensions = ', '.join(mimetypes.guess_all_extensions(mtype))
|
|
|
|
print(' - %s (%s)' % (mtype, extensions))
|
|
|
|
|
2018-04-04 23:21:48 +02:00
|
|
|
|
2018-04-04 23:15:00 +02:00
|
|
|
def __get_files_recursively(files):
|
|
|
|
for f in files:
|
|
|
|
if os.path.isfile(f):
|
|
|
|
yield f
|
|
|
|
else:
|
|
|
|
for path, _, _files in os.walk(f):
|
|
|
|
for _f in _files:
|
|
|
|
yield os.path.join(path, _f)
|
|
|
|
|
2018-04-14 21:23:31 +02:00
|
|
|
def __do_clean_async(is_lightweigth, q):
|
2018-04-14 16:10:45 +02:00
|
|
|
while True:
|
|
|
|
f = q.get()
|
|
|
|
if f is None: # nothing more to process
|
|
|
|
return
|
2018-04-16 22:06:55 +02:00
|
|
|
clean_meta(f, is_lightweigth)
|
2018-04-14 16:10:45 +02:00
|
|
|
q.task_done()
|
|
|
|
|
2018-04-04 23:15:00 +02:00
|
|
|
|
2018-03-06 23:20:18 +01:00
|
|
|
def main():
|
2018-04-03 23:57:13 +02:00
|
|
|
arg_parser = create_arg_parser()
|
|
|
|
args = arg_parser.parse_args()
|
2018-04-02 19:12:10 +02:00
|
|
|
|
2018-04-04 23:15:00 +02:00
|
|
|
if not args.files:
|
|
|
|
if not args.list:
|
|
|
|
return arg_parser.print_help()
|
|
|
|
show_parsers()
|
|
|
|
return
|
|
|
|
|
2018-04-14 16:10:45 +02:00
|
|
|
elif args.show:
|
2018-04-04 23:18:32 +02:00
|
|
|
for f in __get_files_recursively(args.files):
|
2018-03-06 23:20:18 +01:00
|
|
|
show_meta(f)
|
2018-04-14 16:10:45 +02:00
|
|
|
return
|
|
|
|
|
|
|
|
else: # Thread the cleaning
|
2018-04-16 22:06:55 +02:00
|
|
|
mode = (args.lightweight is True)
|
2018-04-14 16:10:45 +02:00
|
|
|
q = Queue(maxsize=0)
|
|
|
|
threads = list()
|
2018-04-04 23:18:32 +02:00
|
|
|
for f in __get_files_recursively(args.files):
|
2018-04-14 16:10:45 +02:00
|
|
|
q.put(f)
|
|
|
|
|
|
|
|
for _ in range(multiprocessing.cpu_count()):
|
2018-04-14 21:23:31 +02:00
|
|
|
worker = Thread(target=__do_clean_async, args=(mode, q))
|
2018-04-14 16:10:45 +02:00
|
|
|
worker.start()
|
|
|
|
threads.append(worker)
|
|
|
|
|
|
|
|
for _ in range(multiprocessing.cpu_count()):
|
|
|
|
q.put(None) # stop the threads
|
|
|
|
|
|
|
|
for worker in threads:
|
|
|
|
worker.join()
|
2018-03-06 23:20:18 +01:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|