From 697cb36b814d7e01da336c43b1932264302a2528 Mon Sep 17 00:00:00 2001 From: georg Date: Thu, 28 Nov 2019 02:15:20 +0000 Subject: [PATCH] This is mat2, not MAT2 Closes #131 --- CHANGELOG.md | 16 ++++++++-------- CONTRIBUTING.md | 6 +++--- INSTALL.md | 4 ++-- README.md | 22 +++++++++++----------- doc/implementation_notes.md | 18 +++++++++--------- doc/mat2.1 | 6 +++--- doc/threat_model.md | 24 ++++++++++++------------ libmat2/archive.py | 2 +- libmat2/pdf.py | 2 +- libmat2/torrent.py | 2 +- mat2 | 8 ++++---- nautilus/mat2.py | 2 +- tests/test_climat2.py | 4 ++-- 13 files changed, 58 insertions(+), 58 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 941c0a8..3cd83e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,12 +67,12 @@ # 0.3.1 - 2018-09-01 -- Document how to install MAT2 for various distributions +- Document how to install mat2 for various distributions - Fix various typos in the documentation/comments -- Add ArchLinux to the CI to ensure that MAT2 is running on it +- Add ArchLinux to the CI to ensure that mat2 is running on it - Fix the handling of files with a name ending in `.JPG` - Improve the detection of unsupported extensions in upper-case -- Streamline MAT2's logging +- Streamline mat2's logging # 0.3.0 - 2018-08-03 @@ -92,14 +92,14 @@ - Simplify various code-paths - Remove superfluous debug message - Remove the `--check` option that never was implemented anyway -- Add a `-c` option to check for MAT2's dependencies +- Add a `-c` option to check for mat2's dependencies # 0.1.3 - 2018-07-06 -- Improve MAT2 resilience against corrupted images +- Improve mat2 resilience against corrupted images - Check that the minimal version of Poppler is available -- Simplify how MAT2 deals with office files +- Simplify how mat2 deals with office files - Improve cleaning of office files - Thumbnails are removed - Revisions are removed @@ -111,8 +111,8 @@ - Rename some files to ease the packaging - Add linters to the CI (mypy, bandit and pyflakes) - Prevent exitftool-related parameters injections -- Improve MAT2's resilience against corrupted files -- Make MAT2 work on fedora, thanks to @atenart +- Improve mat2's resilience against corrupted files +- Make mat2 work on fedora, thanks to @atenart - Tighten the threat model - Simplify and improve how office files are handled diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ed8013c..34dfe23 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ -# Contributing to MAT2 +# Contributing to mat2 -The main repository for MAT2 is on [0xacab]( https://0xacab.org/jvoisin/mat2 ), +The main repository for mat2 is on [0xacab]( https://0xacab.org/jvoisin/mat2 ), but you can send patches to jvoisin by [email](https://dustri.org/) if you prefer. Do feel free to pick up [an issue]( https://0xacab.org/jvoisin/mat2/issues ) @@ -16,7 +16,7 @@ If you're adding a new fileformat, please add tests for: 2. Cleaning metadata 3. Raising `ValueError` upon a corrupted file -Since MAT2 is written in Python3, please conform as much as possible to the +Since mat2 is written in Python3, please conform as much as possible to the [pep8]( https://pep8.org/ ) style; except where it makes no sense of course. # Doing a release diff --git a/INSTALL.md b/INSTALL.md index fc66d9d..0503539 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -14,7 +14,7 @@ pip3 install mat2 ## Optional dependencies When [bubblewrap](https://github.com/projectatomic/bubblewrap) is -installed, MAT2 uses it to sandbox any external processes it invokes. +installed, mat2 uses it to sandbox any external processes it invokes. ## Arch Linux @@ -48,4 +48,4 @@ dnf -y install mat2 mat2-nautilus ## Gentoo -MAT2 is available in the [torbrowser overlay](https://github.com/MeisterP/torbrowser-overlay). +mat2 is available in the [torbrowser overlay](https://github.com/MeisterP/torbrowser-overlay). diff --git a/README.md b/README.md index 33d0296..e0866b4 100644 --- a/README.md +++ b/README.md @@ -22,10 +22,10 @@ camera was used. Office documents like PDF or Office automatically adds author and company information to documents and spreadsheets. Maybe you don't want to disclose those information. -This is precisely the job of MAT2: getting rid, as much as possible, of +This is precisely the job of mat2: getting rid, as much as possible, of metadata. -MAT2 provides both a command line tool, and a graphical user interface +mat2 provides both a command line tool, and a graphical user interface via an extension for Nautilus, the default file manager of GNOME. # Requirements @@ -38,7 +38,7 @@ via an extension for Nautilus, the default file manager of GNOME. - `libimage-exiftool-perl` for everything else - `bubblewrap`, optionally, for sandboxing -Please note that MAT2 requires at least Python3.5. +Please note that mat2 requires at least Python3.5. # Running the test suite @@ -53,7 +53,7 @@ $ python3-coverage run --branch -m unittest discover -s tests/ $ python3-coverage report --include -m --include /libmat2/*' ``` -# How to use MAT2 +# How to use mat2 ```bash usage: mat2 [-h] [-v] [-l] [--check-dependencies] [-V] @@ -69,25 +69,25 @@ optional arguments: -h, --help show this help message and exit -v, --version show program's version number and exit -l, --list list all supported fileformats - --check-dependencies check if MAT2 has all the dependencies it needs + --check-dependencies check if mat2 has all the dependencies it needs -V, --verbose show more verbose status information --unknown-members policy how to handle unknown members of archive-style files (policy should be one of: abort, omit, keep) [Default: abort] - -s, --show list harmful metadata detectable by MAT2 without + -s, --show list harmful metadata detectable by mat2 without removing them -L, --lightweight remove SOME metadata ``` -Note that MAT2 **will not** clean files in-place, but will produce, for +Note that mat2 **will not** clean files in-place, but will produce, for example, with a file named "myfile.png" a cleaned version named "myfile.cleaned.png". # Notes about detecting metadata -While MAT2 is doing its very best to display metadata when the `--show` flag is -passed, it doesn't mean that a file is clean from any metadata if MAT2 doesn't +While mat2 is doing its very best to display metadata when the `--show` flag is +passed, it doesn't mean that a file is clean from any metadata if mat2 doesn't show any. There is no reliable way to detect every single possible metadata for complex file formats. @@ -138,14 +138,14 @@ You should have received a copy of the GNU Lesser General Public License along with this program. If not, see . Copyright 2018 Julien (jvoisin) Voisin -Copyright 2016 Marie-Rose for MAT2's logo +Copyright 2016 Marie-Rose for mat2's logo The `tests/data/dirty_with_nsid.docx` file is licensed under GPLv3, and was borrowed from the Calibre project: https://calibre-ebook.com/downloads/demos/demo.docx # Thanks -MAT2 wouldn't exist without: +mat2 wouldn't exist without: - the [Google Summer of Code](https://summerofcode.withgoogle.com/); - the fine people from [Tails]( https://tails.boum.org); diff --git a/doc/implementation_notes.md b/doc/implementation_notes.md index 7555d2e..e298646 100644 --- a/doc/implementation_notes.md +++ b/doc/implementation_notes.md @@ -4,7 +4,7 @@ Implementation notes Lightweight cleaning mode ------------------------- -Due to *popular* request, MAT2 is providing a *lightweight* cleaning mode, +Due to *popular* request, mat2 is providing a *lightweight* cleaning mode, that only cleans the superficial metadata of your file, but not the ones that might be in **embedded** resources. Like for example, images in a PDF or an office document. @@ -19,7 +19,7 @@ are entirely removed. deleted. For example journalists that are editing a document to erase mentions sources mentions. -- Or they are aware of it, and will likely not expect MAT2 to be able to keep +- Or they are aware of it, and will likely not expect mat2 to be able to keep the revisions, that are basically traces about how, when and who edited the document. @@ -27,15 +27,15 @@ are entirely removed. Race conditions --------------- -MAT2 does its very best to avoid crashing at runtime. This is why it's checking -if the file is valid __at parser creation__. MAT2 doesn't take any measure to +mat2 does its very best to avoid crashing at runtime. This is why it's checking +if the file is valid __at parser creation__. mat2 doesn't take any measure to ensure that the file is not changed between the time the parser is instantiated, and the call to clean or show the metadata. Symlink attacks --------------- -MAT2 output predictable filenames (like yourfile.jpg.cleaned). +mat2 output predictable filenames (like yourfile.jpg.cleaned). This may lead to symlink attack. Please check if you OS prevent against them @@ -65,10 +65,10 @@ didn't remove any *deep metadata*, like the ones in embedded pictures. This was on of the reason MAT was abandoned: the absence of satisfying solution to handle PDF. But apparently, people are ok with [pdf redact tools](https://github.com/firstlookmedia/pdf-redact-tools), that simply -transform the PDF into images. So this is what's MAT2 is doing too. +transform the PDF into images. So this is what's mat2 is doing too. Of course, it would be possible to detect images in PDf file, and process them -with MAT2, but since a PDF can contain a lot of things, like images, videos, +with mat2, but since a PDF can contain a lot of things, like images, videos, javascript, pdf, blobs, … this is the easiest and safest way to clean them. Images handling @@ -81,7 +81,7 @@ XML attacks ----------- Since our threat model conveniently excludes files crafted to specifically -bypass MAT2, fileformats containing harmful XML are out of our scope. -But since MAT2 is using [etree](https://docs.python.org/3/library/xml.html#xml-vulnerabilities) +bypass mat2, fileformats containing harmful XML are out of our scope. +But since mat2 is using [etree](https://docs.python.org/3/library/xml.html#xml-vulnerabilities) to process XML, it's "only" vulnerable to DoS, and not memory corruption: odds are that the user will notice that the cleaning didn't succeed. diff --git a/doc/mat2.1 b/doc/mat2.1 index c63b46b..c03842d 100644 --- a/doc/mat2.1 +++ b/doc/mat2.1 @@ -1,4 +1,4 @@ -.TH MAT2 "1" "May 2019" "MAT2 0.9.0" "User Commands" +.TH mat2 "1" "May 2019" "mat2 0.9.0" "User Commands" .SH NAME mat2 \- the metadata anonymisation toolkit 2 @@ -32,7 +32,7 @@ show program's version number and exit list all supported fileformats .TP \fB\-\-check\-dependencies\fR -check if MAT2 has all the dependencies it needs +check if mat2 has all the dependencies it needs .TP \fB\-V\fR, \fB\-\-verbose\fR show more verbose status information @@ -41,7 +41,7 @@ show more verbose status information how to handle unknown members of archive-style files (policy should be one of: abort, omit, keep) .TP \fB\-s\fR, \fB\-\-show\fR -list harmful metadata detectable by MAT2 without +list harmful metadata detectable by mat2 without removing them .TP \fB\-L\fR, \fB\-\-lightweight\fR diff --git a/doc/threat_model.md b/doc/threat_model.md index 31bfe91..8b97c67 100644 --- a/doc/threat_model.md +++ b/doc/threat_model.md @@ -3,7 +3,7 @@ Threat Model The Metadata Anonymisation Toolkit 2 adversary has a number of goals, capabilities, and counter-attack types that can be -used to guide us towards a set of requirements for the MAT2. +used to guide us towards a set of requirements for the mat2. This is an overhaul of MAT's (the first iteration of the software) one. @@ -53,7 +53,7 @@ Adversary user. This is the strongest position for the adversary to have. In this case, the adversary is capable of inserting arbitrary, custom watermarks specifically for tracking - the user. In general, MAT2 cannot defend against this + the user. In general, mat2 cannot defend against this adversary, but we list it for completeness' sake. - The adversary created the document for a group of users. @@ -65,7 +65,7 @@ Adversary - The adversary did not create the document, the weakest position for the adversary to have. The file format is (most of the time) standard, nothing custom is added: - MAT2 must be able to remove all metadata from the file. + mat2 must be able to remove all metadata from the file. Requirements @@ -73,28 +73,28 @@ Requirements * Processing - - MAT2 *should* avoid interactions with information. + - mat2 *should* avoid interactions with information. Its goal is to remove metadata, and the user is solely responsible for the information of the file. - - MAT2 *must* warn when encountering an unknown - format. For example, in a zipfile, if MAT2 encounters an + - mat2 *must* warn when encountering an unknown + format. For example, in a zipfile, if mat2 encounters an unknown format, it should warn the user, and ask if the file should be added to the anonymised archive that is produced. - - MAT2 *must* not add metadata, since its purpose is to + - mat2 *must* not add metadata, since its purpose is to anonymise files: every added items of metadata decreases anonymity. - - MAT2 *should* handle unknown/hidden metadata fields, + - mat2 *should* handle unknown/hidden metadata fields, like proprietary extensions of open formats. - - MAT2 *must not* fail silently. Upon failure, - MAT2 *must not* modify the file in any way. + - mat2 *must not* fail silently. Upon failure, + mat2 *must not* modify the file in any way. - - MAT2 *might* leak the fact that MAT2 was used on the file, + - mat2 *might* leak the fact that mat2 was used on the file, since it might be uncommon for some file formats to come without any kind of metadata, an adversary might suspect that - the user used MAT2 on certain files. + the user used mat2 on certain files. diff --git a/libmat2/archive.py b/libmat2/archive.py index e715fb1..de80a35 100644 --- a/libmat2/archive.py +++ b/libmat2/archive.py @@ -52,7 +52,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): self.member_class = None # type: ignore # Those are the files that have a format that _isn't_ - # supported by MAT2, but that we want to keep anyway. + # supported by mat2, but that we want to keep anyway. self.files_to_keep = set() # type: Set[Pattern] # Those are the files that we _do not_ want to keep, diff --git a/libmat2/pdf.py b/libmat2/pdf.py index b9f7c99..547e071 100644 --- a/libmat2/pdf.py +++ b/libmat2/pdf.py @@ -19,7 +19,7 @@ from . import abstract poppler_version = Poppler.get_version() if LooseVersion(poppler_version) < LooseVersion('0.46'): # pragma: no cover - raise ValueError("MAT2 needs at least Poppler version 0.46 to work. \ + raise ValueError("mat2 needs at least Poppler version 0.46 to work. \ The installed version is %s." % poppler_version) # pragma: no cover diff --git a/libmat2/torrent.py b/libmat2/torrent.py index e2c82b7..1a82740 100644 --- a/libmat2/torrent.py +++ b/libmat2/torrent.py @@ -36,7 +36,7 @@ class TorrentParser(abstract.AbstractParser): class _BencodeHandler: """ Since bencode isn't that hard to parse, - MAT2 comes with its own parser, based on the spec + mat2 comes with its own parser, based on the spec https://wiki.theory.org/index.php/BitTorrentSpecification#Bencoding """ def __init__(self): diff --git a/mat2 b/mat2 index ec09637..d6b2255 100755 --- a/mat2 +++ b/mat2 @@ -63,19 +63,19 @@ def create_arg_parser() -> argparse.ArgumentParser: excl_group.add_argument('files', nargs='*', help='the files to process', default=[]) excl_group.add_argument('-v', '--version', action='version', - version='MAT2 %s' % __version__) + version='mat2 %s' % __version__) excl_group.add_argument('-l', '--list', action='store_true', default=False, help='list all supported fileformats') excl_group.add_argument('--check-dependencies', action='store_true', default=False, - help='check if MAT2 has all the dependencies it ' + help='check if mat2 has all the dependencies it ' 'needs') excl_group = parser.add_mutually_exclusive_group() excl_group.add_argument('-L', '--lightweight', action='store_true', help='remove SOME metadata') excl_group.add_argument('-s', '--show', action='store_true', - help='list harmful metadata detectable by MAT2 ' + help='list harmful metadata detectable by mat2 ' 'without removing them') return parser @@ -190,7 +190,7 @@ def main() -> int: show_parsers() return 0 elif args.check_dependencies: - print("Dependencies for MAT2 %s:" % __version__) + print("Dependencies for mat2 %s:" % __version__) for key, value in sorted(check_dependencies().items()): print('- %s: %s %s' % (key, 'yes' if value['found'] else 'no', '(optional)' if not value['required'] else '')) diff --git a/nautilus/mat2.py b/nautilus/mat2.py index d476d55..fdba725 100644 --- a/nautilus/mat2.py +++ b/nautilus/mat2.py @@ -235,7 +235,7 @@ class Mat2Extension(GObject.GObject, Nautilus.MenuProvider, Nautilus.LocationWid return None item = Nautilus.MenuItem( - name="MAT2::Remove_metadata", + name="mat2::Remove_metadata", label="Remove metadata", tip="Remove metadata" ) diff --git a/tests/test_climat2.py b/tests/test_climat2.py index da790d0..8ab7cbd 100644 --- a/tests/test_climat2.py +++ b/tests/test_climat2.py @@ -40,14 +40,14 @@ class TestVersion(unittest.TestCase): def test_version(self): proc = subprocess.Popen(mat2_binary + ['--version'], stdout=subprocess.PIPE) stdout, _ = proc.communicate() - self.assertTrue(stdout.startswith(b'MAT2 ')) + self.assertTrue(stdout.startswith(b'mat2 ')) class TestDependencies(unittest.TestCase): def test_dependencies(self): proc = subprocess.Popen(mat2_binary + ['--check-dependencies'], stdout=subprocess.PIPE) stdout, _ = proc.communicate() - self.assertTrue(b'MAT2' in stdout) + self.assertTrue(b'mat2' in stdout) class TestReturnValue(unittest.TestCase):