mat2/tests/test_corrupted_files.py

#!/usr/bin/env python3

import unittest
import shutil
import os
import logging
import zipfile

from libmat2 import pdf, images, audio, office, parser_factory, torrent
from libmat2 import harmless, video, web

# No need to logging messages, should something go wrong,
# the testsuite _will_ fail.
logger = logging.getLogger()
logger.setLevel(logging.FATAL)


class TestInexistentFiles(unittest.TestCase):
    def test_ro(self):
        parser, mimetype = parser_factory.get_parser('/etc/passwd')
        self.assertEqual(mimetype, None)
        self.assertEqual(parser, None)

    def test_notaccessible(self):
        parser, mimetype = parser_factory.get_parser('/etc/shadow')
        self.assertEqual(mimetype, None)
        self.assertEqual(parser, None)

    def test_folder(self):
        parser, mimetype = parser_factory.get_parser('./tests/')
        self.assertEqual(mimetype, None)
        self.assertEqual(parser, None)

    def test_inexistingfile(self):
        parser, mimetype = parser_factory.get_parser('./tests/NONEXISTING_FILE')
        self.assertEqual(mimetype, None)
        self.assertEqual(parser, None)

    def test_chardevice(self):
        parser, mimetype = parser_factory.get_parser('/dev/zero')
        self.assertEqual(mimetype, None)
        self.assertEqual(parser, None)

    def test_brokensymlink(self):
        shutil.copy('./tests/test_libmat2.py', './tests/clean.py')
        os.symlink('./tests/clean.py', './tests/SYMLINK')
        os.remove('./tests/clean.py')
        parser, mimetype = parser_factory.get_parser('./tests/SYMLINK')
        self.assertEqual(mimetype, None)
        self.assertEqual(parser, None)
        os.unlink('./tests/SYMLINK')

class TestUnsupportedFiles(unittest.TestCase):
    def test_pdf(self):
        shutil.copy('./tests/test_libmat2.py', './tests/clean.py')
        parser, mimetype = parser_factory.get_parser('./tests/data/clean.py')
        self.assertEqual(mimetype, 'text/x-python')
        self.assertEqual(parser, None)
        os.remove('./tests/clean.py')

class TestCorruptedEmbedded(unittest.TestCase):
    def test_docx(self):
        shutil.copy('./tests/data/embedded_corrupted.docx', './tests/data/clean.docx')
        parser, _ = parser_factory.get_parser('./tests/data/clean.docx')
        self.assertFalse(parser.remove_all())
        self.assertIsNotNone(parser.get_meta())
        os.remove('./tests/data/clean.docx')

    def test_odt(self):
        shutil.copy('./tests/data/embedded_corrupted.odt', './tests/data/clean.odt')
        parser, _ = parser_factory.get_parser('./tests/data/clean.odt')
        self.assertFalse(parser.remove_all())
        self.assertTrue(parser.get_meta())
        os.remove('./tests/data/clean.odt')


class TestExplicitelyUnsupportedFiles(unittest.TestCase):
    def test_pdf(self):
        shutil.copy('./tests/test_libmat2.py', './tests/data/clean.py')
        parser, mimetype = parser_factory.get_parser('./tests/data/clean.py')
        self.assertEqual(mimetype, 'text/x-python')
        self.assertEqual(parser, None)
        os.remove('./tests/data/clean.py')


class TestWrongContentTypesFileOffice(unittest.TestCase):
    def test_office_incomplete(self):
        shutil.copy('./tests/data/malformed_content_types.docx', './tests/data/clean.docx')
        p = office.MSOfficeParser('./tests/data/clean.docx')
        self.assertIsNotNone(p)
        self.assertFalse(p.remove_all())
        os.remove('./tests/data/clean.docx')

    def test_office_broken(self):
        shutil.copy('./tests/data/broken_xml_content_types.docx', './tests/data/clean.docx')
        with self.assertRaises(ValueError):
            office.MSOfficeParser('./tests/data/clean.docx')
        os.remove('./tests/data/clean.docx')

    def test_office_absent(self):
        shutil.copy('./tests/data/no_content_types.docx', './tests/data/clean.docx')
        with self.assertRaises(ValueError):
            office.MSOfficeParser('./tests/data/clean.docx')
        os.remove('./tests/data/clean.docx')

class TestCorruptedFiles(unittest.TestCase):
    def test_pdf(self):
        shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
        with self.assertRaises(ValueError):
            pdf.PDFParser('./tests/data/clean.png')
        os.remove('./tests/data/clean.png')

    def test_png(self):
        shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
        with self.assertRaises(ValueError):
            images.PNGParser('./tests/data/clean.pdf')
        os.remove('./tests/data/clean.pdf')

    def test_png2(self):
        shutil.copy('./tests/test_libmat2.py', './tests/clean.png')
        parser, _ = parser_factory.get_parser('./tests/clean.png')
        self.assertIsNone(parser)
        os.remove('./tests/clean.png')

    def test_torrent(self):
        shutil.copy('./tests/data/dirty.png', './tests/data/clean.torrent')
        with self.assertRaises(ValueError):
            torrent.TorrentParser('./tests/data/clean.torrent')

        with open("./tests/data/clean.torrent", "a") as f:
            f.write("trailing garbage")
        with self.assertRaises(ValueError):
            torrent.TorrentParser('./tests/data/clean.torrent')

        with open("./tests/data/clean.torrent", "w") as f:
            f.write("i-0e")
        with self.assertRaises(ValueError):
            torrent.TorrentParser('./tests/data/clean.torrent')

        with open("./tests/data/clean.torrent", "w") as f:
            f.write("i00e")
        with self.assertRaises(ValueError):
            torrent.TorrentParser('./tests/data/clean.torrent')

        with open("./tests/data/clean.torrent", "w") as f:
            f.write("01:AAAAAAAAA")
        with self.assertRaises(ValueError):
            torrent.TorrentParser('./tests/data/clean.torrent')

        with open("./tests/data/clean.torrent", "w") as f:
            f.write("1:aaa")
        with self.assertRaises(ValueError):
            torrent.TorrentParser('./tests/data/clean.torrent')

        os.remove('./tests/data/clean.torrent')

    def test_odg(self):
        shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg')
        with self.assertRaises(ValueError):
            office.LibreOfficeParser('./tests/data/clean.odg')
        os.remove('./tests/data/clean.odg')

    def test_bmp(self):
        shutil.copy('./tests/data/dirty.png', './tests/data/clean.bmp')
        ret = harmless.HarmlessParser('./tests/data/clean.bmp')
        self.assertIsNotNone(ret)
        os.remove('./tests/data/clean.bmp')

    def test_docx(self):
        shutil.copy('./tests/data/dirty.png', './tests/data/clean.docx')
        with self.assertRaises(ValueError):
            office.MSOfficeParser('./tests/data/clean.docx')
        os.remove('./tests/data/clean.docx')

    def test_flac(self):
        shutil.copy('./tests/data/dirty.png', './tests/data/clean.flac')
        with self.assertRaises(ValueError):
            audio.FLACParser('./tests/data/clean.flac')
        os.remove('./tests/data/clean.flac')

    def test_mp3(self):
        shutil.copy('./tests/data/dirty.png', './tests/data/clean.mp3')
        with self.assertRaises(ValueError):
            audio.MP3Parser('./tests/data/clean.mp3')
        os.remove('./tests/data/clean.mp3')

    def test_jpg(self):
        shutil.copy('./tests/data/dirty.mp3', './tests/data/clean.jpg')
        with self.assertRaises(ValueError):
             images.JPGParser('./tests/data/clean.jpg')
        os.remove('./tests/data/clean.jpg')

    def test_png_lightweight(self):
        return
        shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.png')
        p = images.PNGParser('./tests/data/clean.png')
        self.assertTrue(p.remove_all())
        os.remove('./tests/data/clean.png')

    def test_avi(self):
        try:
            video._get_ffmpeg_path()
        except RuntimeError:
            raise unittest.SkipTest

        shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.avi')
        p = video.AVIParser('./tests/data/clean.avi')
        self.assertFalse(p.remove_all())
        os.remove('./tests/data/clean.avi')

    def test_avi_injection(self):
        try:
            video._get_ffmpeg_path()
        except RuntimeError:
            raise unittest.SkipTest

        shutil.copy('./tests/data/dirty.torrent', './tests/data/--output.avi')
        p = video.AVIParser('./tests/data/--output.avi')
        self.assertFalse(p.remove_all())
        os.remove('./tests/data/--output.avi')

    def test_zip(self):
        with zipfile.ZipFile('./tests/data/clean.zip', 'w') as zout:
            zout.write('./tests/data/dirty.flac')
            zout.write('./tests/data/dirty.docx')
            zout.write('./tests/data/dirty.jpg')
            zout.write('./tests/data/embedded_corrupted.docx')
        p, mimetype = parser_factory.get_parser('./tests/data/clean.zip')
        self.assertEqual(mimetype, 'application/zip')
        meta = p.get_meta()
        self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
        self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
        self.assertFalse(p.remove_all())
        os.remove('./tests/data/clean.zip')

    def test_html(self):
        shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
        with open('./tests/data/clean.html', 'a') as f:
            f.write('<open>but not</closed>')
        with self.assertRaises(ValueError):
            web.HTMLParser('./tests/data/clean.html')
        os.remove('./tests/data/clean.html')

        # Yes, we're able to deal with malformed html :/
        shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
        with open('./tests/data/clean.html', 'a') as f:
            f.write('<meta name=\'this" is="weird"/>')
        p = web.HTMLParser('./tests/data/clean.html')
        self.assertTrue(p.remove_all())
        p = web.HTMLParser('./tests/data/clean.cleaned.html')
        self.assertEqual(p.get_meta(), {})
        os.remove('./tests/data/clean.html')
        os.remove('./tests/data/clean.cleaned.html')

        with open('./tests/data/clean.html', 'w') as f:
            f.write('</close>')
        with self.assertRaises(ValueError):
            web.HTMLParser('./tests/data/clean.html')
        os.remove('./tests/data/clean.html')

        with open('./tests/data/clean.html', 'w') as f:
            f.write('<notclosed>')
        p = web.HTMLParser('./tests/data/clean.html')
        with self.assertRaises(ValueError):
            p.get_meta()
        p = web.HTMLParser('./tests/data/clean.html')
        with self.assertRaises(ValueError):
            p.remove_all()
        os.remove('./tests/data/clean.html')

        with open('./tests/data/clean.html', 'w') as f:
            f.write('<doctitle><br/></doctitle><br/><notclosed>')
        p = web.HTMLParser('./tests/data/clean.html')
        with self.assertRaises(ValueError):
            p.get_meta()
        p = web.HTMLParser('./tests/data/clean.html')
        with self.assertRaises(ValueError):
            p.remove_all()
        os.remove('./tests/data/clean.html')

    def test_epub(self):
        with zipfile.ZipFile('./tests/data/clean.epub', 'w') as zout:
            zout.write('./tests/data/dirty.jpg', 'OEBPS/content.opf')
        p, mimetype = parser_factory.get_parser('./tests/data/clean.epub')
        self.assertEqual(mimetype, 'application/epub+zip')
        meta = p.get_meta()
        self.assertEqual(meta['OEBPS/content.opf']['OEBPS/content.opf'],
                'harmful content')

        self.assertFalse(p.remove_all())
        os.remove('./tests/data/clean.epub')
trivial modification of all shebang. `/usr/bin/python3` -> `/usr/bin/env python3` It's always better to trust the environment defined path to bin/python, as virtualenv become the way to go. 2018-09-11 15:54:53 +02:00			`#!/usr/bin/env python3`
Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00
			`import unittest`
			`import shutil`
			`import os`
Silence a bit the testsuite 2018-10-02 15:26:13 +02:00			`import logging`
Add support for zip files 2018-10-25 11:56:46 +02:00			`import zipfile`
Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00
Test mat2's reliability wrt. corrupted video files 2018-10-22 13:42:04 +02:00			`from libmat2 import pdf, images, audio, office, parser_factory, torrent`
Implement epub support 2019-02-21 01:28:11 +01:00			`from libmat2 import harmless, video, web`
Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00
Silence a bit the testsuite 2018-10-02 15:26:13 +02:00			`# No need to logging messages, should something go wrong,`
			`# the testsuite _will_ fail.`
			`logger = logging.getLogger()`
			`logger.setLevel(logging.FATAL)`

Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00
Add some tests about pathological files 2018-07-30 22:36:36 +02:00			`class TestInexistentFiles(unittest.TestCase):`
			`def test_ro(self):`
			`parser, mimetype = parser_factory.get_parser('/etc/passwd')`
			`self.assertEqual(mimetype, None)`
			`self.assertEqual(parser, None)`

			`def test_notaccessible(self):`
			`parser, mimetype = parser_factory.get_parser('/etc/shadow')`
			`self.assertEqual(mimetype, None)`
			`self.assertEqual(parser, None)`

			`def test_folder(self):`
			`parser, mimetype = parser_factory.get_parser('./tests/')`
			`self.assertEqual(mimetype, None)`
			`self.assertEqual(parser, None)`

			`def test_inexistingfile(self):`
			`parser, mimetype = parser_factory.get_parser('./tests/NONEXISTING_FILE')`
			`self.assertEqual(mimetype, None)`
			`self.assertEqual(parser, None)`

			`def test_chardevice(self):`
			`parser, mimetype = parser_factory.get_parser('/dev/zero')`
			`self.assertEqual(mimetype, None)`
			`self.assertEqual(parser, None)`

			`def test_brokensymlink(self):`
			`shutil.copy('./tests/test_libmat2.py', './tests/clean.py')`
			`os.symlink('./tests/clean.py', './tests/SYMLINK')`
			`os.remove('./tests/clean.py')`
			`parser, mimetype = parser_factory.get_parser('./tests/SYMLINK')`
			`self.assertEqual(mimetype, None)`
			`self.assertEqual(parser, None)`
			`os.unlink('./tests/SYMLINK')`

Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00			`class TestUnsupportedFiles(unittest.TestCase):`
			`def test_pdf(self):`
			`shutil.copy('./tests/test_libmat2.py', './tests/clean.py')`
			`parser, mimetype = parser_factory.get_parser('./tests/data/clean.py')`
			`self.assertEqual(mimetype, 'text/x-python')`
			`self.assertEqual(parser, None)`
			`os.remove('./tests/clean.py')`

Bump coverage for office files and fix some related crashes 2018-07-08 21:35:45 +02:00			`class TestCorruptedEmbedded(unittest.TestCase):`
			`def test_docx(self):`
			`shutil.copy('./tests/data/embedded_corrupted.docx', './tests/data/clean.docx')`
Fix some minor formatting issues 2018-09-24 19:50:24 +02:00			`parser, _ = parser_factory.get_parser('./tests/data/clean.docx')`
Bump coverage for office files and fix some related crashes 2018-07-08 21:35:45 +02:00			`self.assertFalse(parser.remove_all())`
			`self.assertIsNotNone(parser.get_meta())`
			`os.remove('./tests/data/clean.docx')`

			`def test_odt(self):`
			`shutil.copy('./tests/data/embedded_corrupted.odt', './tests/data/clean.odt')`
Fix some minor formatting issues 2018-09-24 19:50:24 +02:00			`parser, _ = parser_factory.get_parser('./tests/data/clean.odt')`
Bump coverage for office files and fix some related crashes 2018-07-08 21:35:45 +02:00			`self.assertFalse(parser.remove_all())`
Use of the archive refactoring for the office documents too 2019-02-07 21:58:10 +01:00			`self.assertTrue(parser.get_meta())`
Bump coverage for office files and fix some related crashes 2018-07-08 21:35:45 +02:00			`os.remove('./tests/data/clean.odt')`

Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00
			`class TestExplicitelyUnsupportedFiles(unittest.TestCase):`
			`def test_pdf(self):`
Add support for .txt files 2018-07-06 00:42:09 +02:00			`shutil.copy('./tests/test_libmat2.py', './tests/data/clean.py')`
			`parser, mimetype = parser_factory.get_parser('./tests/data/clean.py')`
			`self.assertEqual(mimetype, 'text/x-python')`
Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00			`self.assertEqual(parser, None)`
Add support for .txt files 2018-07-06 00:42:09 +02:00			`os.remove('./tests/data/clean.py')`
Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00

Use [Content_Types].xml to improve MS Office coverage 2018-10-01 22:26:35 +02:00			`class TestWrongContentTypesFileOffice(unittest.TestCase):`
			`def test_office_incomplete(self):`
Remove dangling references in MS Office's [Content_types].xml 2018-09-30 19:52:35 +02:00			`shutil.copy('./tests/data/malformed_content_types.docx', './tests/data/clean.docx')`
			`p = office.MSOfficeParser('./tests/data/clean.docx')`
			`self.assertIsNotNone(p)`
			`self.assertFalse(p.remove_all())`
			`os.remove('./tests/data/clean.docx')`

Use [Content_Types].xml to improve MS Office coverage 2018-10-01 22:26:35 +02:00			`def test_office_broken(self):`
			`shutil.copy('./tests/data/broken_xml_content_types.docx', './tests/data/clean.docx')`
			`with self.assertRaises(ValueError):`
			`office.MSOfficeParser('./tests/data/clean.docx')`
			`os.remove('./tests/data/clean.docx')`

			`def test_office_absent(self):`
			`shutil.copy('./tests/data/no_content_types.docx', './tests/data/clean.docx')`
			`with self.assertRaises(ValueError):`
			`office.MSOfficeParser('./tests/data/clean.docx')`
			`os.remove('./tests/data/clean.docx')`

Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00			`class TestCorruptedFiles(unittest.TestCase):`
			`def test_pdf(self):`
			`shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')`
			`with self.assertRaises(ValueError):`
			`pdf.PDFParser('./tests/data/clean.png')`
			`os.remove('./tests/data/clean.png')`

			`def test_png(self):`
			`shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')`
			`with self.assertRaises(ValueError):`
			`images.PNGParser('./tests/data/clean.pdf')`
			`os.remove('./tests/data/clean.pdf')`

			`def test_png2(self):`
			`shutil.copy('./tests/test_libmat2.py', './tests/clean.png')`
Fix some minor formatting issues 2018-09-24 19:50:24 +02:00			`parser, _ = parser_factory.get_parser('./tests/clean.png')`
Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00			`self.assertIsNone(parser)`
			`os.remove('./tests/clean.png')`

			`def test_torrent(self):`
			`shutil.copy('./tests/data/dirty.png', './tests/data/clean.torrent')`
Simplify how torrent files are handled - Rework the testsuite wrt. torrent - fail at parser's instantiation on corrupted torrent, instead of during `get_meta` or `remove_all` call 2018-07-08 13:47:00 +02:00			`with self.assertRaises(ValueError):`
			`torrent.TorrentParser('./tests/data/clean.torrent')`
Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00
			`with open("./tests/data/clean.torrent", "a") as f:`
			`f.write("trailing garbage")`
Simplify how torrent files are handled - Rework the testsuite wrt. torrent - fail at parser's instantiation on corrupted torrent, instead of during `get_meta` or `remove_all` call 2018-07-08 13:47:00 +02:00			`with self.assertRaises(ValueError):`
			`torrent.TorrentParser('./tests/data/clean.torrent')`

Bump coverage for torrent handling 2018-07-08 15:13:03 +02:00			`with open("./tests/data/clean.torrent", "w") as f:`
			`f.write("i-0e")`
			`with self.assertRaises(ValueError):`
			`torrent.TorrentParser('./tests/data/clean.torrent')`

			`with open("./tests/data/clean.torrent", "w") as f:`
			`f.write("i00e")`
			`with self.assertRaises(ValueError):`
			`torrent.TorrentParser('./tests/data/clean.torrent')`

			`with open("./tests/data/clean.torrent", "w") as f:`
Achieve 100% coverage! 2018-07-08 22:27:37 +02:00			`f.write("01:AAAAAAAAA")`
Bump coverage for torrent handling 2018-07-08 15:13:03 +02:00			`with self.assertRaises(ValueError):`
			`torrent.TorrentParser('./tests/data/clean.torrent')`

			`with open("./tests/data/clean.torrent", "w") as f:`
			`f.write("1:aaa")`
			`with self.assertRaises(ValueError):`
			`torrent.TorrentParser('./tests/data/clean.torrent')`

Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00			`os.remove('./tests/data/clean.torrent')`

			`def test_odg(self):`
			`shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg')`
			`with self.assertRaises(ValueError):`
			`office.LibreOfficeParser('./tests/data/clean.odg')`
			`os.remove('./tests/data/clean.odg')`

			`def test_bmp(self):`
			`shutil.copy('./tests/data/dirty.png', './tests/data/clean.bmp')`
Fix some minor formatting issues 2018-09-24 19:50:24 +02:00			`ret = harmless.HarmlessParser('./tests/data/clean.bmp')`
			`self.assertIsNotNone(ret)`
Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00			`os.remove('./tests/data/clean.bmp')`

			`def test_docx(self):`
			`shutil.copy('./tests/data/dirty.png', './tests/data/clean.docx')`
			`with self.assertRaises(ValueError):`
Second pass of minor formatting 2018-09-24 20:15:07 +02:00			`office.MSOfficeParser('./tests/data/clean.docx')`
Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00			`os.remove('./tests/data/clean.docx')`

			`def test_flac(self):`
			`shutil.copy('./tests/data/dirty.png', './tests/data/clean.flac')`
			`with self.assertRaises(ValueError):`
Second pass of minor formatting 2018-09-24 20:15:07 +02:00			`audio.FLACParser('./tests/data/clean.flac')`
Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00			`os.remove('./tests/data/clean.flac')`

			`def test_mp3(self):`
			`shutil.copy('./tests/data/dirty.png', './tests/data/clean.mp3')`
			`with self.assertRaises(ValueError):`
Second pass of minor formatting 2018-09-24 20:15:07 +02:00			`audio.MP3Parser('./tests/data/clean.mp3')`
Split the testsuite a bit and add more tests 2018-06-22 21:16:55 +02:00			`os.remove('./tests/data/clean.mp3')`
Bump a bit the coverage 2018-07-07 18:02:53 +02:00
			`def test_jpg(self):`
			`shutil.copy('./tests/data/dirty.mp3', './tests/data/clean.jpg')`
			`with self.assertRaises(ValueError):`
			`images.JPGParser('./tests/data/clean.jpg')`
			`os.remove('./tests/data/clean.jpg')`
Test mat2's reliability wrt. corrupted video files 2018-10-22 13:42:04 +02:00
Implement lightweight cleaning for png and tiff 2018-10-23 16:14:21 +02:00			`def test_png_lightweight(self):`
			`return`
			`shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.png')`
			`p = images.PNGParser('./tests/data/clean.png')`
			`self.assertTrue(p.remove_all())`
			`os.remove('./tests/data/clean.png')`

Test mat2's reliability wrt. corrupted video files 2018-10-22 13:42:04 +02:00			`def test_avi(self):`
Fix the testsuite on fedora 2018-10-22 13:55:09 +02:00			`try:`
			`video._get_ffmpeg_path()`
			`except RuntimeError:`
			`raise unittest.SkipTest`

Test mat2's reliability wrt. corrupted video files 2018-10-22 13:42:04 +02:00			`shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.avi')`
			`p = video.AVIParser('./tests/data/clean.avi')`
			`self.assertFalse(p.remove_all())`
			`os.remove('./tests/data/clean.avi')`
Optimize the handling of problematic files 2018-10-22 19:12:39 +02:00
			`def test_avi_injection(self):`
			`try:`
			`video._get_ffmpeg_path()`
			`except RuntimeError:`
			`raise unittest.SkipTest`

			`shutil.copy('./tests/data/dirty.torrent', './tests/data/--output.avi')`
			`p = video.AVIParser('./tests/data/--output.avi')`
			`self.assertFalse(p.remove_all())`
			`os.remove('./tests/data/--output.avi')`
Add support for zip files 2018-10-25 11:56:46 +02:00
			`def test_zip(self):`
Implement epub support 2019-02-21 01:28:11 +01:00			`with zipfile.ZipFile('./tests/data/clean.zip', 'w') as zout:`
Add support for zip files 2018-10-25 11:56:46 +02:00			`zout.write('./tests/data/dirty.flac')`
			`zout.write('./tests/data/dirty.docx')`
			`zout.write('./tests/data/dirty.jpg')`
			`zout.write('./tests/data/embedded_corrupted.docx')`
Implement epub support 2019-02-21 01:28:11 +01:00			`p, mimetype = parser_factory.get_parser('./tests/data/clean.zip')`
Add support for zip files 2018-10-25 11:56:46 +02:00			`self.assertEqual(mimetype, 'application/zip')`
			`meta = p.get_meta()`
			`self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')`
			`self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')`
			`self.assertFalse(p.remove_all())`
Implement epub support 2019-02-21 01:28:11 +01:00			`os.remove('./tests/data/clean.zip')`
Add support for html files 2019-02-08 00:26:47 +01:00
			`def test_html(self):`
			`shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')`
			`with open('./tests/data/clean.html', 'a') as f:`
			`f.write('<open>but not</closed>')`
			`with self.assertRaises(ValueError):`
Implement epub support 2019-02-21 01:28:11 +01:00			`web.HTMLParser('./tests/data/clean.html')`
Add support for html files 2019-02-08 00:26:47 +01:00			`os.remove('./tests/data/clean.html')`

			`# Yes, we're able to deal with malformed html :/`
			`shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')`
			`with open('./tests/data/clean.html', 'a') as f:`
			`f.write('<meta name=\'this" is="weird"/>')`
Implement epub support 2019-02-21 01:28:11 +01:00			`p = web.HTMLParser('./tests/data/clean.html')`
Add support for html files 2019-02-08 00:26:47 +01:00			`self.assertTrue(p.remove_all())`
Implement epub support 2019-02-21 01:28:11 +01:00			`p = web.HTMLParser('./tests/data/clean.cleaned.html')`
Add support for html files 2019-02-08 00:26:47 +01:00			`self.assertEqual(p.get_meta(), {})`
			`os.remove('./tests/data/clean.html')`
			`os.remove('./tests/data/clean.cleaned.html')`

			`with open('./tests/data/clean.html', 'w') as f:`
			`f.write('</close>')`
			`with self.assertRaises(ValueError):`
Implement epub support 2019-02-21 01:28:11 +01:00			`web.HTMLParser('./tests/data/clean.html')`
Add support for html files 2019-02-08 00:26:47 +01:00			`os.remove('./tests/data/clean.html')`

			`with open('./tests/data/clean.html', 'w') as f:`
			`f.write('<notclosed>')`
Implement epub support 2019-02-21 01:28:11 +01:00			`p = web.HTMLParser('./tests/data/clean.html')`
Add support for html files 2019-02-08 00:26:47 +01:00			`with self.assertRaises(ValueError):`
			`p.get_meta()`
Implement epub support 2019-02-21 01:28:11 +01:00			`p = web.HTMLParser('./tests/data/clean.html')`
Add support for html files 2019-02-08 00:26:47 +01:00			`with self.assertRaises(ValueError):`
			`p.remove_all()`
			`os.remove('./tests/data/clean.html')`

Implement epub support 2019-02-21 01:28:11 +01:00			`with open('./tests/data/clean.html', 'w') as f:`
			`f.write('<doctitle><br/></doctitle><br/><notclosed>')`
			`p = web.HTMLParser('./tests/data/clean.html')`
			`with self.assertRaises(ValueError):`
			`p.get_meta()`
			`p = web.HTMLParser('./tests/data/clean.html')`
			`with self.assertRaises(ValueError):`
			`p.remove_all()`
			`os.remove('./tests/data/clean.html')`

			`def test_epub(self):`
			`with zipfile.ZipFile('./tests/data/clean.epub', 'w') as zout:`
			`zout.write('./tests/data/dirty.jpg', 'OEBPS/content.opf')`
			`p, mimetype = parser_factory.get_parser('./tests/data/clean.epub')`
			`self.assertEqual(mimetype, 'application/epub+zip')`
			`meta = p.get_meta()`
			`self.assertEqual(meta['OEBPS/content.opf']['OEBPS/content.opf'],`
			`'harmful content')`

			`self.assertFalse(p.remove_all())`
			`os.remove('./tests/data/clean.epub')`
Add support for html files 2019-02-08 00:26:47 +01:00