From 3a070b0ab70c4d4a456bdd12d0cd490ad127e320 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Thu, 25 Oct 2018 11:56:46 +0200 Subject: [PATCH] Add support for zip files --- libmat2/archive.py | 5 +++++ tests/test_corrupted_files.py | 15 +++++++++++++++ tests/test_libmat2.py | 35 ++++++++++++++++++++++++++++++++++- 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/libmat2/archive.py b/libmat2/archive.py index b4700c3..bcf8d33 100644 --- a/libmat2/archive.py +++ b/libmat2/archive.py @@ -157,3 +157,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): os.remove(self.output_filename) return False return True + + + +class ZipParser(ArchiveBasedAbstractParser): + mimetypes = {'application/zip'} diff --git a/tests/test_corrupted_files.py b/tests/test_corrupted_files.py index 181d4d2..e7d3c2a 100644 --- a/tests/test_corrupted_files.py +++ b/tests/test_corrupted_files.py @@ -4,6 +4,7 @@ import unittest import shutil import os import logging +import zipfile from libmat2 import pdf, images, audio, office, parser_factory, torrent from libmat2 import harmless, video @@ -222,3 +223,17 @@ class TestCorruptedFiles(unittest.TestCase): p = video.AVIParser('./tests/data/--output.avi') self.assertFalse(p.remove_all()) os.remove('./tests/data/--output.avi') + + def test_zip(self): + with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout: + zout.write('./tests/data/dirty.flac') + zout.write('./tests/data/dirty.docx') + zout.write('./tests/data/dirty.jpg') + zout.write('./tests/data/embedded_corrupted.docx') + p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip') + self.assertEqual(mimetype, 'application/zip') + meta = p.get_meta() + self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') + self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') + self.assertFalse(p.remove_all()) + os.remove('./tests/data/dirty.zip') diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 46d6aaa..1602480 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -6,7 +6,7 @@ import os import zipfile from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless -from libmat2 import check_dependencies, video +from libmat2 import check_dependencies, video, archive class TestCheckDependencies(unittest.TestCase): @@ -153,6 +153,18 @@ class TestGetMeta(unittest.TestCase): meta = p.get_meta() self.assertEqual(meta, {}) + def test_zip(self): + with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout: + zout.write('./tests/data/dirty.flac') + zout.write('./tests/data/dirty.docx') + zout.write('./tests/data/dirty.jpg') + p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip') + self.assertEqual(mimetype, 'application/zip') + meta = p.get_meta() + self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') + self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') + os.remove('./tests/data/dirty.zip') + class TestRemovingThumbnails(unittest.TestCase): def test_odt(self): @@ -488,3 +500,24 @@ class TestCleaning(unittest.TestCase): os.remove('./tests/data/clean.avi') os.remove('./tests/data/clean.cleaned.avi') os.remove('./tests/data/clean.cleaned.cleaned.avi') + + def test_zip(self): + with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout: + zout.write('./tests/data/dirty.flac') + zout.write('./tests/data/dirty.docx') + zout.write('./tests/data/dirty.jpg') + p = archive.ZipParser('./tests/data/dirty.zip') + meta = p.get_meta() + self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') + + ret = p.remove_all() + self.assertTrue(ret) + + p = archive.ZipParser('./tests/data/dirty.cleaned.zip') + self.assertEqual(p.get_meta(), {}) + self.assertTrue(p.remove_all()) + + os.remove('./tests/data/dirty.zip') + os.remove('./tests/data/dirty.cleaned.zip') + os.remove('./tests/data/dirty.cleaned.cleaned.zip') +