1
0
Fork 0
mat2/tests/test_libmat2.py

243 lines
7.7 KiB
Python
Raw Normal View History

2018-03-13 01:01:07 +01:00
#!/usr/bin/python3
import unittest
import shutil
import os
import zipfile
import tempfile
2018-03-13 01:01:07 +01:00
from src import pdf, png, jpg, audio, office, libreoffice, parser_factory
2018-03-13 01:01:07 +01:00
class TestGetMeta(unittest.TestCase):
def test_pdf(self):
p = pdf.PDFParser('./tests/data/dirty.pdf')
2018-03-18 21:42:12 +01:00
meta = p.get_meta()
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
self.assertEqual(meta['creator'], "'Certified by IEEE PDFeXpress at 03/19/2016 2:56:07 AM'")
2018-03-13 01:01:07 +01:00
2018-03-20 23:35:02 +01:00
def test_png(self):
p = png.PNGParser('./tests/data/dirty.png')
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
self.assertEqual(meta['ModifyDate'], "2018:03:20 21:59:25")
2018-03-13 01:01:07 +01:00
2018-03-25 15:09:12 +02:00
def test_jpg(self):
p = jpg.JPGParser('./tests/data/dirty.jpg')
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'Created with GIMP')
2018-03-25 16:17:41 +02:00
def test_mp3(self):
p = audio.MP3Parser('./tests/data/dirty.mp3')
meta = p.get_meta()
self.assertEqual(meta['TXXX:I am a '], ['various comment'])
def test_ogg(self):
p = audio.OGGParser('./tests/data/dirty.ogg')
meta = p.get_meta()
self.assertEqual(meta['TITLE'], ['I am so'])
2018-03-25 16:20:45 +02:00
def test_flac(self):
p = audio.FLACParser('./tests/data/dirty.flac')
meta = p.get_meta()
self.assertEqual(meta['TITLE'], ['I am so'])
2018-03-31 15:47:06 +02:00
def test_docx(self):
p = office.OfficeParser('./tests/data/dirty.docx')
meta = p.get_meta()
2018-03-31 20:56:15 +02:00
self.assertEqual(meta['cp:lastModifiedBy'], 'Julien Voisin')
self.assertEqual(meta['dc:creator'], 'julien voisin')
self.assertEqual(meta['Application'], 'LibreOffice/5.4.5.1$Linux_X86_64 LibreOffice_project/40m0$Build-1')
2018-03-31 15:47:06 +02:00
2018-03-31 21:20:21 +02:00
def test_libreoffice(self):
p = libreoffice.LibreOfficeParser('./tests/data/dirty.odt')
meta = p.get_meta()
self.assertEqual(meta['meta:initial-creator'], 'jvoisin ')
self.assertEqual(meta['meta:creation-date'], '2011-07-26T03:27:48')
self.assertEqual(meta['meta:generator'], 'LibreOffice/3.3$Unix LibreOffice_project/330m19$Build-202')
class TestDeepCleaning(unittest.TestCase):
2018-04-01 00:17:06 +02:00
def __check_deep_meta(self, p):
tempdir = tempfile.mkdtemp()
zipin = zipfile.ZipFile(p.filename)
zipin.extractall(tempdir)
for subdir, dirs, files in os.walk(tempdir):
for f in files:
complete_path = os.path.join(subdir, f)
inside_p = parser_factory.get_parser(complete_path)
if inside_p is None:
continue
print('[+] %s is clean inside %s' %(complete_path, p.filename))
self.assertEqual(inside_p.get_meta(), {})
shutil.rmtree(tempdir)
2018-04-01 00:17:06 +02:00
def __check_zip_meta(self, p):
zipin = zipfile.ZipFile(p.filename)
for item in zipin.infolist():
self.assertEqual(item.comment, b'')
self.assertEqual(item.date_time, (1980, 1, 1, 0, 0, 0))
self.assertEqual(item.create_system, 3) # 3 is UNIX
def test_office(self):
shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
p = office.OfficeParser('./tests/data/clean.docx')
meta = p.get_meta()
self.assertIsNotNone(meta)
ret = p.remove_all()
self.assertTrue(ret)
p = office.OfficeParser('./tests/data/clean.docx.cleaned')
self.assertEqual(p.get_meta(), {})
2018-04-01 00:17:06 +02:00
self.__check_zip_meta(p)
self.__check_deep_meta(p)
os.remove('./tests/data/clean.docx')
def test_libreoffice(self):
shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
p = libreoffice.LibreOfficeParser('./tests/data/clean.odt')
meta = p.get_meta()
self.assertIsNotNone(meta)
ret = p.remove_all()
self.assertTrue(ret)
p = libreoffice.LibreOfficeParser('./tests/data/clean.odt.cleaned')
self.assertEqual(p.get_meta(), {})
2018-04-01 00:17:06 +02:00
self.__check_zip_meta(p)
self.__check_deep_meta(p)
os.remove('./tests/data/clean.odt')
2018-03-25 16:17:41 +02:00
2018-03-20 23:35:02 +01:00
class TestCleaning(unittest.TestCase):
2018-03-13 01:01:07 +01:00
def test_pdf(self):
2018-03-20 23:35:02 +01:00
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
2018-03-13 01:01:07 +01:00
p = pdf.PDFParser('./tests/data/clean.pdf')
2018-03-18 21:42:12 +01:00
meta = p.get_meta()
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
ret = p.remove_all()
self.assertTrue(ret)
p = pdf.PDFParser('./tests/data/clean.pdf.cleaned')
2018-03-18 23:48:14 +01:00
expected_meta = {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1}
self.assertEqual(p.get_meta(), expected_meta)
2018-03-20 23:35:02 +01:00
os.remove('./tests/data/clean.pdf')
def test_png(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
p = png.PNGParser('./tests/data/clean.png')
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
ret = p.remove_all()
self.assertTrue(ret)
p = png.PNGParser('./tests/data/clean.png.cleaned')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.png')
2018-03-25 15:09:12 +02:00
def test_jpg(self):
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
p = jpg.JPGParser('./tests/data/clean.jpg')
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'Created with GIMP')
ret = p.remove_all()
self.assertTrue(ret)
p = jpg.JPGParser('./tests/data/clean.jpg.cleaned')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.jpg')
2018-03-25 16:17:41 +02:00
def test_mp3(self):
shutil.copy('./tests/data/dirty.mp3', './tests/data/clean.mp3')
p = audio.MP3Parser('./tests/data/clean.mp3')
meta = p.get_meta()
self.assertEqual(meta['TXXX:I am a '], ['various comment'])
ret = p.remove_all()
self.assertTrue(ret)
p = audio.MP3Parser('./tests/data/clean.mp3.cleaned')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.mp3')
def test_ogg(self):
shutil.copy('./tests/data/dirty.ogg', './tests/data/clean.ogg')
p = audio.OGGParser('./tests/data/clean.ogg')
meta = p.get_meta()
self.assertEqual(meta['TITLE'], ['I am so'])
ret = p.remove_all()
self.assertTrue(ret)
p = audio.OGGParser('./tests/data/clean.ogg.cleaned')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.ogg')
2018-03-25 16:20:45 +02:00
def test_flac(self):
shutil.copy('./tests/data/dirty.flac', './tests/data/clean.flac')
p = audio.FLACParser('./tests/data/clean.flac')
meta = p.get_meta()
self.assertEqual(meta['TITLE'], ['I am so'])
ret = p.remove_all()
self.assertTrue(ret)
p = audio.FLACParser('./tests/data/clean.flac.cleaned')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.flac')
2018-03-31 15:47:06 +02:00
def test_office(self):
shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
p = office.OfficeParser('./tests/data/clean.docx')
meta = p.get_meta()
self.assertIsNotNone(meta)
ret = p.remove_all()
self.assertTrue(ret)
p = office.OfficeParser('./tests/data/clean.docx.cleaned')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.docx')
2018-03-31 21:20:21 +02:00
def test_libreoffice(self):
shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
p = libreoffice.LibreOfficeParser('./tests/data/clean.odt')
meta = p.get_meta()
self.assertIsNotNone(meta)
ret = p.remove_all()
self.assertTrue(ret)
p = libreoffice.LibreOfficeParser('./tests/data/clean.odt.cleaned')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.odt')