1
0
mirror of synced 2024-11-25 18:54:22 +01:00
mat2/tests/test_corrupted_files.py
jvoisin 55214206b5 Improve the previous commit
- More tests
- More documentation
- Minor code cleanup
2019-02-27 23:53:07 +01:00

294 lines
12 KiB
Python

#!/usr/bin/env python3
import unittest
import shutil
import os
import logging
import zipfile
from libmat2 import pdf, images, audio, office, parser_factory, torrent
from libmat2 import harmless, video, web
# No need to logging messages, should something go wrong,
# the testsuite _will_ fail.
logger = logging.getLogger()
logger.setLevel(logging.FATAL)
class TestInexistentFiles(unittest.TestCase):
def test_ro(self):
parser, mimetype = parser_factory.get_parser('/etc/passwd')
self.assertEqual(mimetype, None)
self.assertEqual(parser, None)
def test_notaccessible(self):
parser, mimetype = parser_factory.get_parser('/etc/shadow')
self.assertEqual(mimetype, None)
self.assertEqual(parser, None)
def test_folder(self):
parser, mimetype = parser_factory.get_parser('./tests/')
self.assertEqual(mimetype, None)
self.assertEqual(parser, None)
def test_inexistingfile(self):
parser, mimetype = parser_factory.get_parser('./tests/NONEXISTING_FILE')
self.assertEqual(mimetype, None)
self.assertEqual(parser, None)
def test_chardevice(self):
parser, mimetype = parser_factory.get_parser('/dev/zero')
self.assertEqual(mimetype, None)
self.assertEqual(parser, None)
def test_brokensymlink(self):
shutil.copy('./tests/test_libmat2.py', './tests/clean.py')
os.symlink('./tests/clean.py', './tests/SYMLINK')
os.remove('./tests/clean.py')
parser, mimetype = parser_factory.get_parser('./tests/SYMLINK')
self.assertEqual(mimetype, None)
self.assertEqual(parser, None)
os.unlink('./tests/SYMLINK')
class TestUnsupportedFiles(unittest.TestCase):
def test_pdf(self):
shutil.copy('./tests/test_libmat2.py', './tests/clean.py')
parser, mimetype = parser_factory.get_parser('./tests/data/clean.py')
self.assertEqual(mimetype, 'text/x-python')
self.assertEqual(parser, None)
os.remove('./tests/clean.py')
class TestCorruptedEmbedded(unittest.TestCase):
def test_docx(self):
shutil.copy('./tests/data/embedded_corrupted.docx', './tests/data/clean.docx')
parser, _ = parser_factory.get_parser('./tests/data/clean.docx')
self.assertFalse(parser.remove_all())
self.assertIsNotNone(parser.get_meta())
os.remove('./tests/data/clean.docx')
def test_odt(self):
shutil.copy('./tests/data/embedded_corrupted.odt', './tests/data/clean.odt')
parser, _ = parser_factory.get_parser('./tests/data/clean.odt')
self.assertFalse(parser.remove_all())
self.assertTrue(parser.get_meta())
os.remove('./tests/data/clean.odt')
class TestExplicitelyUnsupportedFiles(unittest.TestCase):
def test_pdf(self):
shutil.copy('./tests/test_libmat2.py', './tests/data/clean.py')
parser, mimetype = parser_factory.get_parser('./tests/data/clean.py')
self.assertEqual(mimetype, 'text/x-python')
self.assertEqual(parser, None)
os.remove('./tests/data/clean.py')
class TestWrongContentTypesFileOffice(unittest.TestCase):
def test_office_incomplete(self):
shutil.copy('./tests/data/malformed_content_types.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
self.assertIsNotNone(p)
self.assertFalse(p.remove_all())
os.remove('./tests/data/clean.docx')
def test_office_broken(self):
shutil.copy('./tests/data/broken_xml_content_types.docx', './tests/data/clean.docx')
with self.assertRaises(ValueError):
office.MSOfficeParser('./tests/data/clean.docx')
os.remove('./tests/data/clean.docx')
def test_office_absent(self):
shutil.copy('./tests/data/no_content_types.docx', './tests/data/clean.docx')
with self.assertRaises(ValueError):
office.MSOfficeParser('./tests/data/clean.docx')
os.remove('./tests/data/clean.docx')
class TestCorruptedFiles(unittest.TestCase):
def test_pdf(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
with self.assertRaises(ValueError):
pdf.PDFParser('./tests/data/clean.png')
os.remove('./tests/data/clean.png')
def test_png(self):
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
with self.assertRaises(ValueError):
images.PNGParser('./tests/data/clean.pdf')
os.remove('./tests/data/clean.pdf')
def test_png2(self):
shutil.copy('./tests/test_libmat2.py', './tests/clean.png')
parser, _ = parser_factory.get_parser('./tests/clean.png')
self.assertIsNone(parser)
os.remove('./tests/clean.png')
def test_torrent(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.torrent')
with self.assertRaises(ValueError):
torrent.TorrentParser('./tests/data/clean.torrent')
with open("./tests/data/clean.torrent", "a") as f:
f.write("trailing garbage")
with self.assertRaises(ValueError):
torrent.TorrentParser('./tests/data/clean.torrent')
with open("./tests/data/clean.torrent", "w") as f:
f.write("i-0e")
with self.assertRaises(ValueError):
torrent.TorrentParser('./tests/data/clean.torrent')
with open("./tests/data/clean.torrent", "w") as f:
f.write("i00e")
with self.assertRaises(ValueError):
torrent.TorrentParser('./tests/data/clean.torrent')
with open("./tests/data/clean.torrent", "w") as f:
f.write("01:AAAAAAAAA")
with self.assertRaises(ValueError):
torrent.TorrentParser('./tests/data/clean.torrent')
with open("./tests/data/clean.torrent", "w") as f:
f.write("1:aaa")
with self.assertRaises(ValueError):
torrent.TorrentParser('./tests/data/clean.torrent')
os.remove('./tests/data/clean.torrent')
def test_odg(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg')
with self.assertRaises(ValueError):
office.LibreOfficeParser('./tests/data/clean.odg')
os.remove('./tests/data/clean.odg')
def test_bmp(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.bmp')
ret = harmless.HarmlessParser('./tests/data/clean.bmp')
self.assertIsNotNone(ret)
os.remove('./tests/data/clean.bmp')
def test_docx(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.docx')
with self.assertRaises(ValueError):
office.MSOfficeParser('./tests/data/clean.docx')
os.remove('./tests/data/clean.docx')
def test_flac(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.flac')
with self.assertRaises(ValueError):
audio.FLACParser('./tests/data/clean.flac')
os.remove('./tests/data/clean.flac')
def test_mp3(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.mp3')
with self.assertRaises(ValueError):
audio.MP3Parser('./tests/data/clean.mp3')
os.remove('./tests/data/clean.mp3')
def test_jpg(self):
shutil.copy('./tests/data/dirty.mp3', './tests/data/clean.jpg')
with self.assertRaises(ValueError):
images.JPGParser('./tests/data/clean.jpg')
os.remove('./tests/data/clean.jpg')
def test_png_lightweight(self):
return
shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.png')
p = images.PNGParser('./tests/data/clean.png')
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.png')
def test_avi(self):
try:
video._get_ffmpeg_path()
except RuntimeError:
raise unittest.SkipTest
shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.avi')
p = video.AVIParser('./tests/data/clean.avi')
self.assertFalse(p.remove_all())
os.remove('./tests/data/clean.avi')
def test_avi_injection(self):
try:
video._get_ffmpeg_path()
except RuntimeError:
raise unittest.SkipTest
shutil.copy('./tests/data/dirty.torrent', './tests/data/--output.avi')
p = video.AVIParser('./tests/data/--output.avi')
self.assertFalse(p.remove_all())
os.remove('./tests/data/--output.avi')
def test_zip(self):
with zipfile.ZipFile('./tests/data/clean.zip', 'w') as zout:
zout.write('./tests/data/dirty.flac')
zout.write('./tests/data/dirty.docx')
zout.write('./tests/data/dirty.jpg')
zout.write('./tests/data/embedded_corrupted.docx')
p, mimetype = parser_factory.get_parser('./tests/data/clean.zip')
self.assertEqual(mimetype, 'application/zip')
meta = p.get_meta()
self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
self.assertFalse(p.remove_all())
os.remove('./tests/data/clean.zip')
def test_html(self):
shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
with open('./tests/data/clean.html', 'a') as f:
f.write('<open>but not</closed>')
with self.assertRaises(ValueError):
web.HTMLParser('./tests/data/clean.html')
os.remove('./tests/data/clean.html')
# Yes, we're able to deal with malformed html :/
shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
with open('./tests/data/clean.html', 'a') as f:
f.write('<meta name=\'this" is="weird"/>')
p = web.HTMLParser('./tests/data/clean.html')
self.assertTrue(p.remove_all())
p = web.HTMLParser('./tests/data/clean.cleaned.html')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.html')
os.remove('./tests/data/clean.cleaned.html')
with open('./tests/data/clean.html', 'w') as f:
f.write('</meta>')
with self.assertRaises(ValueError):
web.HTMLParser('./tests/data/clean.html')
os.remove('./tests/data/clean.html')
with open('./tests/data/clean.html', 'w') as f:
f.write('<meta><a>test</a><set/></meta><title></title><meta>')
p = web.HTMLParser('./tests/data/clean.html')
with self.assertRaises(ValueError):
p.get_meta()
p = web.HTMLParser('./tests/data/clean.html')
with self.assertRaises(ValueError):
p.remove_all()
os.remove('./tests/data/clean.html')
with open('./tests/data/clean.html', 'w') as f:
f.write('<doctitle><br/></doctitle><br/><notclosed>')
p = web.HTMLParser('./tests/data/clean.html')
with self.assertRaises(ValueError):
p.get_meta()
p = web.HTMLParser('./tests/data/clean.html')
with self.assertRaises(ValueError):
p.remove_all()
os.remove('./tests/data/clean.html')
def test_epub(self):
with zipfile.ZipFile('./tests/data/clean.epub', 'w') as zout:
zout.write('./tests/data/dirty.jpg', 'OEBPS/content.opf')
p, mimetype = parser_factory.get_parser('./tests/data/clean.epub')
self.assertEqual(mimetype, 'application/epub+zip')
meta = p.get_meta()
self.assertEqual(meta['OEBPS/content.opf']['OEBPS/content.opf'],
'harmful content')
self.assertFalse(p.remove_all())
os.remove('./tests/data/clean.epub')