2018-03-13 01:01:07 +01:00
|
|
|
#!/usr/bin/python3
|
|
|
|
|
|
|
|
import unittest
|
|
|
|
import shutil
|
|
|
|
import os
|
|
|
|
|
2018-03-31 15:46:17 +02:00
|
|
|
from src import pdf, png, jpg, audio, office
|
2018-03-13 01:01:07 +01:00
|
|
|
|
|
|
|
class TestGetMeta(unittest.TestCase):
|
|
|
|
def test_pdf(self):
|
|
|
|
p = pdf.PDFParser('./tests/data/dirty.pdf')
|
2018-03-18 21:42:12 +01:00
|
|
|
meta = p.get_meta()
|
|
|
|
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
|
|
|
|
self.assertEqual(meta['creator'], "'Certified by IEEE PDFeXpress at 03/19/2016 2:56:07 AM'")
|
2018-03-13 01:01:07 +01:00
|
|
|
|
2018-03-20 23:35:02 +01:00
|
|
|
def test_png(self):
|
|
|
|
p = png.PNGParser('./tests/data/dirty.png')
|
|
|
|
meta = p.get_meta()
|
|
|
|
self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
|
|
|
|
self.assertEqual(meta['ModifyDate'], "2018:03:20 21:59:25")
|
2018-03-13 01:01:07 +01:00
|
|
|
|
2018-03-25 15:09:12 +02:00
|
|
|
def test_jpg(self):
|
|
|
|
p = jpg.JPGParser('./tests/data/dirty.jpg')
|
|
|
|
meta = p.get_meta()
|
|
|
|
self.assertEqual(meta['Comment'], 'Created with GIMP')
|
|
|
|
|
2018-03-25 16:17:41 +02:00
|
|
|
def test_mp3(self):
|
|
|
|
p = audio.MP3Parser('./tests/data/dirty.mp3')
|
|
|
|
meta = p.get_meta()
|
|
|
|
self.assertEqual(meta['TXXX:I am a '], ['various comment'])
|
|
|
|
|
|
|
|
def test_ogg(self):
|
|
|
|
p = audio.OGGParser('./tests/data/dirty.ogg')
|
|
|
|
meta = p.get_meta()
|
|
|
|
self.assertEqual(meta['TITLE'], ['I am so'])
|
|
|
|
|
2018-03-25 16:20:45 +02:00
|
|
|
def test_flac(self):
|
|
|
|
p = audio.FLACParser('./tests/data/dirty.flac')
|
|
|
|
meta = p.get_meta()
|
|
|
|
self.assertEqual(meta['TITLE'], ['I am so'])
|
|
|
|
|
2018-03-31 15:47:06 +02:00
|
|
|
def test_docx(self):
|
|
|
|
p = office.OfficeParser('./tests/data/dirty.docx')
|
|
|
|
meta = p.get_meta()
|
|
|
|
print(meta)
|
|
|
|
|
2018-03-25 16:17:41 +02:00
|
|
|
|
2018-03-20 23:35:02 +01:00
|
|
|
class TestCleaning(unittest.TestCase):
|
2018-03-13 01:01:07 +01:00
|
|
|
def test_pdf(self):
|
2018-03-20 23:35:02 +01:00
|
|
|
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
|
2018-03-13 01:01:07 +01:00
|
|
|
p = pdf.PDFParser('./tests/data/clean.pdf')
|
2018-03-18 21:42:12 +01:00
|
|
|
|
|
|
|
meta = p.get_meta()
|
|
|
|
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
|
|
|
|
|
|
|
|
ret = p.remove_all()
|
|
|
|
self.assertTrue(ret)
|
|
|
|
|
|
|
|
p = pdf.PDFParser('./tests/data/clean.pdf.cleaned')
|
2018-03-18 23:48:14 +01:00
|
|
|
expected_meta = {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1}
|
|
|
|
self.assertEqual(p.get_meta(), expected_meta)
|
2018-03-20 23:35:02 +01:00
|
|
|
|
|
|
|
os.remove('./tests/data/clean.pdf')
|
|
|
|
|
|
|
|
def test_png(self):
|
|
|
|
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
|
|
|
|
p = png.PNGParser('./tests/data/clean.png')
|
|
|
|
|
|
|
|
meta = p.get_meta()
|
|
|
|
self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
|
|
|
|
|
|
|
|
ret = p.remove_all()
|
|
|
|
self.assertTrue(ret)
|
|
|
|
|
|
|
|
p = png.PNGParser('./tests/data/clean.png.cleaned')
|
|
|
|
self.assertEqual(p.get_meta(), {})
|
|
|
|
|
|
|
|
os.remove('./tests/data/clean.png')
|
2018-03-25 15:09:12 +02:00
|
|
|
|
|
|
|
def test_jpg(self):
|
|
|
|
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
|
|
|
|
p = jpg.JPGParser('./tests/data/clean.jpg')
|
|
|
|
|
|
|
|
meta = p.get_meta()
|
|
|
|
self.assertEqual(meta['Comment'], 'Created with GIMP')
|
|
|
|
|
|
|
|
ret = p.remove_all()
|
|
|
|
self.assertTrue(ret)
|
|
|
|
|
|
|
|
p = jpg.JPGParser('./tests/data/clean.jpg.cleaned')
|
|
|
|
self.assertEqual(p.get_meta(), {})
|
|
|
|
|
|
|
|
os.remove('./tests/data/clean.jpg')
|
2018-03-25 16:17:41 +02:00
|
|
|
|
|
|
|
def test_mp3(self):
|
|
|
|
shutil.copy('./tests/data/dirty.mp3', './tests/data/clean.mp3')
|
|
|
|
p = audio.MP3Parser('./tests/data/clean.mp3')
|
|
|
|
|
|
|
|
meta = p.get_meta()
|
|
|
|
self.assertEqual(meta['TXXX:I am a '], ['various comment'])
|
|
|
|
|
|
|
|
ret = p.remove_all()
|
|
|
|
self.assertTrue(ret)
|
|
|
|
|
|
|
|
p = audio.MP3Parser('./tests/data/clean.mp3.cleaned')
|
|
|
|
self.assertEqual(p.get_meta(), {})
|
|
|
|
|
|
|
|
os.remove('./tests/data/clean.mp3')
|
|
|
|
|
|
|
|
def test_ogg(self):
|
|
|
|
shutil.copy('./tests/data/dirty.ogg', './tests/data/clean.ogg')
|
|
|
|
p = audio.OGGParser('./tests/data/clean.ogg')
|
|
|
|
|
|
|
|
meta = p.get_meta()
|
|
|
|
self.assertEqual(meta['TITLE'], ['I am so'])
|
|
|
|
|
|
|
|
ret = p.remove_all()
|
|
|
|
self.assertTrue(ret)
|
|
|
|
|
|
|
|
p = audio.OGGParser('./tests/data/clean.ogg.cleaned')
|
|
|
|
self.assertEqual(p.get_meta(), {})
|
|
|
|
|
|
|
|
os.remove('./tests/data/clean.ogg')
|
2018-03-25 16:20:45 +02:00
|
|
|
|
|
|
|
def test_flac(self):
|
|
|
|
shutil.copy('./tests/data/dirty.flac', './tests/data/clean.flac')
|
|
|
|
p = audio.FLACParser('./tests/data/clean.flac')
|
|
|
|
|
|
|
|
meta = p.get_meta()
|
|
|
|
self.assertEqual(meta['TITLE'], ['I am so'])
|
|
|
|
|
|
|
|
ret = p.remove_all()
|
|
|
|
self.assertTrue(ret)
|
|
|
|
|
|
|
|
p = audio.FLACParser('./tests/data/clean.flac.cleaned')
|
|
|
|
self.assertEqual(p.get_meta(), {})
|
|
|
|
|
|
|
|
os.remove('./tests/data/clean.flac')
|
2018-03-31 15:47:06 +02:00
|
|
|
|
|
|
|
def test_office(self):
|
|
|
|
shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
|
|
|
|
p = office.OfficeParser('./tests/data/clean.docx')
|
|
|
|
|
|
|
|
meta = p.get_meta()
|
|
|
|
self.assertIsNotNone(meta)
|
|
|
|
|
|
|
|
ret = p.remove_all()
|
|
|
|
self.assertTrue(ret)
|
|
|
|
|
|
|
|
p = office.OfficeParser('./tests/data/clean.docx.cleaned')
|
|
|
|
self.assertEqual(p.get_meta(), {})
|
|
|
|
|
|
|
|
os.remove('./tests/data/clean.docx')
|