parent
6e52661cfb
commit
697cb36b81
16
CHANGELOG.md
16
CHANGELOG.md
@ -67,12 +67,12 @@
|
||||
|
||||
# 0.3.1 - 2018-09-01
|
||||
|
||||
- Document how to install MAT2 for various distributions
|
||||
- Document how to install mat2 for various distributions
|
||||
- Fix various typos in the documentation/comments
|
||||
- Add ArchLinux to the CI to ensure that MAT2 is running on it
|
||||
- Add ArchLinux to the CI to ensure that mat2 is running on it
|
||||
- Fix the handling of files with a name ending in `.JPG`
|
||||
- Improve the detection of unsupported extensions in upper-case
|
||||
- Streamline MAT2's logging
|
||||
- Streamline mat2's logging
|
||||
|
||||
|
||||
# 0.3.0 - 2018-08-03
|
||||
@ -92,14 +92,14 @@
|
||||
- Simplify various code-paths
|
||||
- Remove superfluous debug message
|
||||
- Remove the `--check` option that never was implemented anyway
|
||||
- Add a `-c` option to check for MAT2's dependencies
|
||||
- Add a `-c` option to check for mat2's dependencies
|
||||
|
||||
|
||||
# 0.1.3 - 2018-07-06
|
||||
|
||||
- Improve MAT2 resilience against corrupted images
|
||||
- Improve mat2 resilience against corrupted images
|
||||
- Check that the minimal version of Poppler is available
|
||||
- Simplify how MAT2 deals with office files
|
||||
- Simplify how mat2 deals with office files
|
||||
- Improve cleaning of office files
|
||||
- Thumbnails are removed
|
||||
- Revisions are removed
|
||||
@ -111,8 +111,8 @@
|
||||
- Rename some files to ease the packaging
|
||||
- Add linters to the CI (mypy, bandit and pyflakes)
|
||||
- Prevent exitftool-related parameters injections
|
||||
- Improve MAT2's resilience against corrupted files
|
||||
- Make MAT2 work on fedora, thanks to @atenart
|
||||
- Improve mat2's resilience against corrupted files
|
||||
- Make mat2 work on fedora, thanks to @atenart
|
||||
- Tighten the threat model
|
||||
- Simplify and improve how office files are handled
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Contributing to MAT2
|
||||
# Contributing to mat2
|
||||
|
||||
The main repository for MAT2 is on [0xacab]( https://0xacab.org/jvoisin/mat2 ),
|
||||
The main repository for mat2 is on [0xacab]( https://0xacab.org/jvoisin/mat2 ),
|
||||
but you can send patches to jvoisin by [email](https://dustri.org/) if you prefer.
|
||||
|
||||
Do feel free to pick up [an issue]( https://0xacab.org/jvoisin/mat2/issues )
|
||||
@ -16,7 +16,7 @@ If you're adding a new fileformat, please add tests for:
|
||||
2. Cleaning metadata
|
||||
3. Raising `ValueError` upon a corrupted file
|
||||
|
||||
Since MAT2 is written in Python3, please conform as much as possible to the
|
||||
Since mat2 is written in Python3, please conform as much as possible to the
|
||||
[pep8]( https://pep8.org/ ) style; except where it makes no sense of course.
|
||||
|
||||
# Doing a release
|
||||
|
@ -14,7 +14,7 @@ pip3 install mat2
|
||||
## Optional dependencies
|
||||
|
||||
When [bubblewrap](https://github.com/projectatomic/bubblewrap) is
|
||||
installed, MAT2 uses it to sandbox any external processes it invokes.
|
||||
installed, mat2 uses it to sandbox any external processes it invokes.
|
||||
|
||||
## Arch Linux
|
||||
|
||||
@ -48,4 +48,4 @@ dnf -y install mat2 mat2-nautilus
|
||||
|
||||
## Gentoo
|
||||
|
||||
MAT2 is available in the [torbrowser overlay](https://github.com/MeisterP/torbrowser-overlay).
|
||||
mat2 is available in the [torbrowser overlay](https://github.com/MeisterP/torbrowser-overlay).
|
||||
|
22
README.md
22
README.md
@ -22,10 +22,10 @@ camera was used. Office documents like PDF or Office automatically adds
|
||||
author and company information to documents and spreadsheets.
|
||||
Maybe you don't want to disclose those information.
|
||||
|
||||
This is precisely the job of MAT2: getting rid, as much as possible, of
|
||||
This is precisely the job of mat2: getting rid, as much as possible, of
|
||||
metadata.
|
||||
|
||||
MAT2 provides both a command line tool, and a graphical user interface
|
||||
mat2 provides both a command line tool, and a graphical user interface
|
||||
via an extension for Nautilus, the default file manager of GNOME.
|
||||
|
||||
# Requirements
|
||||
@ -38,7 +38,7 @@ via an extension for Nautilus, the default file manager of GNOME.
|
||||
- `libimage-exiftool-perl` for everything else
|
||||
- `bubblewrap`, optionally, for sandboxing
|
||||
|
||||
Please note that MAT2 requires at least Python3.5.
|
||||
Please note that mat2 requires at least Python3.5.
|
||||
|
||||
# Running the test suite
|
||||
|
||||
@ -53,7 +53,7 @@ $ python3-coverage run --branch -m unittest discover -s tests/
|
||||
$ python3-coverage report --include -m --include /libmat2/*'
|
||||
```
|
||||
|
||||
# How to use MAT2
|
||||
# How to use mat2
|
||||
|
||||
```bash
|
||||
usage: mat2 [-h] [-v] [-l] [--check-dependencies] [-V]
|
||||
@ -69,25 +69,25 @@ optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-v, --version show program's version number and exit
|
||||
-l, --list list all supported fileformats
|
||||
--check-dependencies check if MAT2 has all the dependencies it needs
|
||||
--check-dependencies check if mat2 has all the dependencies it needs
|
||||
-V, --verbose show more verbose status information
|
||||
--unknown-members policy
|
||||
how to handle unknown members of archive-style files
|
||||
(policy should be one of: abort, omit, keep) [Default:
|
||||
abort]
|
||||
-s, --show list harmful metadata detectable by MAT2 without
|
||||
-s, --show list harmful metadata detectable by mat2 without
|
||||
removing them
|
||||
-L, --lightweight remove SOME metadata
|
||||
```
|
||||
|
||||
Note that MAT2 **will not** clean files in-place, but will produce, for
|
||||
Note that mat2 **will not** clean files in-place, but will produce, for
|
||||
example, with a file named "myfile.png" a cleaned version named
|
||||
"myfile.cleaned.png".
|
||||
|
||||
# Notes about detecting metadata
|
||||
|
||||
While MAT2 is doing its very best to display metadata when the `--show` flag is
|
||||
passed, it doesn't mean that a file is clean from any metadata if MAT2 doesn't
|
||||
While mat2 is doing its very best to display metadata when the `--show` flag is
|
||||
passed, it doesn't mean that a file is clean from any metadata if mat2 doesn't
|
||||
show any. There is no reliable way to detect every single possible metadata for
|
||||
complex file formats.
|
||||
|
||||
@ -138,14 +138,14 @@ You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Copyright 2018 Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org>
|
||||
Copyright 2016 Marie-Rose for MAT2's logo
|
||||
Copyright 2016 Marie-Rose for mat2's logo
|
||||
|
||||
The `tests/data/dirty_with_nsid.docx` file is licensed under GPLv3,
|
||||
and was borrowed from the Calibre project: https://calibre-ebook.com/downloads/demos/demo.docx
|
||||
|
||||
# Thanks
|
||||
|
||||
MAT2 wouldn't exist without:
|
||||
mat2 wouldn't exist without:
|
||||
|
||||
- the [Google Summer of Code](https://summerofcode.withgoogle.com/);
|
||||
- the fine people from [Tails]( https://tails.boum.org);
|
||||
|
@ -4,7 +4,7 @@ Implementation notes
|
||||
Lightweight cleaning mode
|
||||
-------------------------
|
||||
|
||||
Due to *popular* request, MAT2 is providing a *lightweight* cleaning mode,
|
||||
Due to *popular* request, mat2 is providing a *lightweight* cleaning mode,
|
||||
that only cleans the superficial metadata of your file, but not
|
||||
the ones that might be in **embedded** resources. Like for example,
|
||||
images in a PDF or an office document.
|
||||
@ -19,7 +19,7 @@ are entirely removed.
|
||||
deleted. For example journalists that are editing a document to erase
|
||||
mentions sources mentions.
|
||||
|
||||
- Or they are aware of it, and will likely not expect MAT2 to be able to keep
|
||||
- Or they are aware of it, and will likely not expect mat2 to be able to keep
|
||||
the revisions, that are basically traces about how, when and who edited the
|
||||
document.
|
||||
|
||||
@ -27,15 +27,15 @@ are entirely removed.
|
||||
Race conditions
|
||||
---------------
|
||||
|
||||
MAT2 does its very best to avoid crashing at runtime. This is why it's checking
|
||||
if the file is valid __at parser creation__. MAT2 doesn't take any measure to
|
||||
mat2 does its very best to avoid crashing at runtime. This is why it's checking
|
||||
if the file is valid __at parser creation__. mat2 doesn't take any measure to
|
||||
ensure that the file is not changed between the time the parser is
|
||||
instantiated, and the call to clean or show the metadata.
|
||||
|
||||
Symlink attacks
|
||||
---------------
|
||||
|
||||
MAT2 output predictable filenames (like yourfile.jpg.cleaned).
|
||||
mat2 output predictable filenames (like yourfile.jpg.cleaned).
|
||||
This may lead to symlink attack. Please check if you OS prevent
|
||||
against them
|
||||
|
||||
@ -65,10 +65,10 @@ didn't remove any *deep metadata*, like the ones in embedded pictures. This was
|
||||
on of the reason MAT was abandoned: the absence of satisfying solution to
|
||||
handle PDF. But apparently, people are ok with [pdf redact
|
||||
tools](https://github.com/firstlookmedia/pdf-redact-tools), that simply
|
||||
transform the PDF into images. So this is what's MAT2 is doing too.
|
||||
transform the PDF into images. So this is what's mat2 is doing too.
|
||||
|
||||
Of course, it would be possible to detect images in PDf file, and process them
|
||||
with MAT2, but since a PDF can contain a lot of things, like images, videos,
|
||||
with mat2, but since a PDF can contain a lot of things, like images, videos,
|
||||
javascript, pdf, blobs, … this is the easiest and safest way to clean them.
|
||||
|
||||
Images handling
|
||||
@ -81,7 +81,7 @@ XML attacks
|
||||
-----------
|
||||
|
||||
Since our threat model conveniently excludes files crafted to specifically
|
||||
bypass MAT2, fileformats containing harmful XML are out of our scope.
|
||||
But since MAT2 is using [etree](https://docs.python.org/3/library/xml.html#xml-vulnerabilities)
|
||||
bypass mat2, fileformats containing harmful XML are out of our scope.
|
||||
But since mat2 is using [etree](https://docs.python.org/3/library/xml.html#xml-vulnerabilities)
|
||||
to process XML, it's "only" vulnerable to DoS, and not memory corruption:
|
||||
odds are that the user will notice that the cleaning didn't succeed.
|
||||
|
@ -1,4 +1,4 @@
|
||||
.TH MAT2 "1" "May 2019" "MAT2 0.9.0" "User Commands"
|
||||
.TH mat2 "1" "May 2019" "mat2 0.9.0" "User Commands"
|
||||
|
||||
.SH NAME
|
||||
mat2 \- the metadata anonymisation toolkit 2
|
||||
@ -32,7 +32,7 @@ show program's version number and exit
|
||||
list all supported fileformats
|
||||
.TP
|
||||
\fB\-\-check\-dependencies\fR
|
||||
check if MAT2 has all the dependencies it needs
|
||||
check if mat2 has all the dependencies it needs
|
||||
.TP
|
||||
\fB\-V\fR, \fB\-\-verbose\fR
|
||||
show more verbose status information
|
||||
@ -41,7 +41,7 @@ show more verbose status information
|
||||
how to handle unknown members of archive-style files (policy should be one of: abort, omit, keep)
|
||||
.TP
|
||||
\fB\-s\fR, \fB\-\-show\fR
|
||||
list harmful metadata detectable by MAT2 without
|
||||
list harmful metadata detectable by mat2 without
|
||||
removing them
|
||||
.TP
|
||||
\fB\-L\fR, \fB\-\-lightweight\fR
|
||||
|
@ -3,7 +3,7 @@ Threat Model
|
||||
|
||||
The Metadata Anonymisation Toolkit 2 adversary has a number
|
||||
of goals, capabilities, and counter-attack types that can be
|
||||
used to guide us towards a set of requirements for the MAT2.
|
||||
used to guide us towards a set of requirements for the mat2.
|
||||
|
||||
This is an overhaul of MAT's (the first iteration of the software) one.
|
||||
|
||||
@ -53,7 +53,7 @@ Adversary
|
||||
user. This is the strongest position for the adversary to
|
||||
have. In this case, the adversary is capable of inserting
|
||||
arbitrary, custom watermarks specifically for tracking
|
||||
the user. In general, MAT2 cannot defend against this
|
||||
the user. In general, mat2 cannot defend against this
|
||||
adversary, but we list it for completeness' sake.
|
||||
|
||||
- The adversary created the document for a group of users.
|
||||
@ -65,7 +65,7 @@ Adversary
|
||||
- The adversary did not create the document, the weakest
|
||||
position for the adversary to have. The file format is
|
||||
(most of the time) standard, nothing custom is added:
|
||||
MAT2 must be able to remove all metadata from the file.
|
||||
mat2 must be able to remove all metadata from the file.
|
||||
|
||||
|
||||
Requirements
|
||||
@ -73,28 +73,28 @@ Requirements
|
||||
|
||||
* Processing
|
||||
|
||||
- MAT2 *should* avoid interactions with information.
|
||||
- mat2 *should* avoid interactions with information.
|
||||
Its goal is to remove metadata, and the user is solely
|
||||
responsible for the information of the file.
|
||||
|
||||
- MAT2 *must* warn when encountering an unknown
|
||||
format. For example, in a zipfile, if MAT2 encounters an
|
||||
- mat2 *must* warn when encountering an unknown
|
||||
format. For example, in a zipfile, if mat2 encounters an
|
||||
unknown format, it should warn the user, and ask if the
|
||||
file should be added to the anonymised archive that is
|
||||
produced.
|
||||
|
||||
- MAT2 *must* not add metadata, since its purpose is to
|
||||
- mat2 *must* not add metadata, since its purpose is to
|
||||
anonymise files: every added items of metadata decreases
|
||||
anonymity.
|
||||
|
||||
- MAT2 *should* handle unknown/hidden metadata fields,
|
||||
- mat2 *should* handle unknown/hidden metadata fields,
|
||||
like proprietary extensions of open formats.
|
||||
|
||||
- MAT2 *must not* fail silently. Upon failure,
|
||||
MAT2 *must not* modify the file in any way.
|
||||
- mat2 *must not* fail silently. Upon failure,
|
||||
mat2 *must not* modify the file in any way.
|
||||
|
||||
- MAT2 *might* leak the fact that MAT2 was used on the file,
|
||||
- mat2 *might* leak the fact that mat2 was used on the file,
|
||||
since it might be uncommon for some file formats to come
|
||||
without any kind of metadata, an adversary might suspect that
|
||||
the user used MAT2 on certain files.
|
||||
the user used mat2 on certain files.
|
||||
|
||||
|
@ -52,7 +52,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
||||
self.member_class = None # type: ignore
|
||||
|
||||
# Those are the files that have a format that _isn't_
|
||||
# supported by MAT2, but that we want to keep anyway.
|
||||
# supported by mat2, but that we want to keep anyway.
|
||||
self.files_to_keep = set() # type: Set[Pattern]
|
||||
|
||||
# Those are the files that we _do not_ want to keep,
|
||||
|
@ -19,7 +19,7 @@ from . import abstract
|
||||
|
||||
poppler_version = Poppler.get_version()
|
||||
if LooseVersion(poppler_version) < LooseVersion('0.46'): # pragma: no cover
|
||||
raise ValueError("MAT2 needs at least Poppler version 0.46 to work. \
|
||||
raise ValueError("mat2 needs at least Poppler version 0.46 to work. \
|
||||
The installed version is %s." % poppler_version) # pragma: no cover
|
||||
|
||||
|
||||
|
@ -36,7 +36,7 @@ class TorrentParser(abstract.AbstractParser):
|
||||
class _BencodeHandler:
|
||||
"""
|
||||
Since bencode isn't that hard to parse,
|
||||
MAT2 comes with its own parser, based on the spec
|
||||
mat2 comes with its own parser, based on the spec
|
||||
https://wiki.theory.org/index.php/BitTorrentSpecification#Bencoding
|
||||
"""
|
||||
def __init__(self):
|
||||
|
8
mat2
8
mat2
@ -63,19 +63,19 @@ def create_arg_parser() -> argparse.ArgumentParser:
|
||||
excl_group.add_argument('files', nargs='*', help='the files to process',
|
||||
default=[])
|
||||
excl_group.add_argument('-v', '--version', action='version',
|
||||
version='MAT2 %s' % __version__)
|
||||
version='mat2 %s' % __version__)
|
||||
excl_group.add_argument('-l', '--list', action='store_true', default=False,
|
||||
help='list all supported fileformats')
|
||||
excl_group.add_argument('--check-dependencies', action='store_true',
|
||||
default=False,
|
||||
help='check if MAT2 has all the dependencies it '
|
||||
help='check if mat2 has all the dependencies it '
|
||||
'needs')
|
||||
|
||||
excl_group = parser.add_mutually_exclusive_group()
|
||||
excl_group.add_argument('-L', '--lightweight', action='store_true',
|
||||
help='remove SOME metadata')
|
||||
excl_group.add_argument('-s', '--show', action='store_true',
|
||||
help='list harmful metadata detectable by MAT2 '
|
||||
help='list harmful metadata detectable by mat2 '
|
||||
'without removing them')
|
||||
|
||||
return parser
|
||||
@ -190,7 +190,7 @@ def main() -> int:
|
||||
show_parsers()
|
||||
return 0
|
||||
elif args.check_dependencies:
|
||||
print("Dependencies for MAT2 %s:" % __version__)
|
||||
print("Dependencies for mat2 %s:" % __version__)
|
||||
for key, value in sorted(check_dependencies().items()):
|
||||
print('- %s: %s %s' % (key, 'yes' if value['found'] else 'no',
|
||||
'(optional)' if not value['required'] else ''))
|
||||
|
@ -235,7 +235,7 @@ class Mat2Extension(GObject.GObject, Nautilus.MenuProvider, Nautilus.LocationWid
|
||||
return None
|
||||
|
||||
item = Nautilus.MenuItem(
|
||||
name="MAT2::Remove_metadata",
|
||||
name="mat2::Remove_metadata",
|
||||
label="Remove metadata",
|
||||
tip="Remove metadata"
|
||||
)
|
||||
|
@ -40,14 +40,14 @@ class TestVersion(unittest.TestCase):
|
||||
def test_version(self):
|
||||
proc = subprocess.Popen(mat2_binary + ['--version'], stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertTrue(stdout.startswith(b'MAT2 '))
|
||||
self.assertTrue(stdout.startswith(b'mat2 '))
|
||||
|
||||
|
||||
class TestDependencies(unittest.TestCase):
|
||||
def test_dependencies(self):
|
||||
proc = subprocess.Popen(mat2_binary + ['--check-dependencies'], stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertTrue(b'MAT2' in stdout)
|
||||
self.assertTrue(b'mat2' in stdout)
|
||||
|
||||
|
||||
class TestReturnValue(unittest.TestCase):
|
||||
|
Loading…
Reference in New Issue
Block a user