2018-03-13 01:01:07 +01:00
|
|
|
#!/usr/bin/python3
|
|
|
|
|
|
|
|
import unittest
|
|
|
|
import shutil
|
|
|
|
import os
|
|
|
|
|
|
|
|
from src import parsers
|
|
|
|
from src.parsers import pdf
|
|
|
|
|
|
|
|
class TestGetMeta(unittest.TestCase):
|
|
|
|
def test_pdf(self):
|
|
|
|
p = pdf.PDFParser('./tests/data/dirty.pdf')
|
2018-03-18 21:42:12 +01:00
|
|
|
meta = p.get_meta()
|
|
|
|
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
|
|
|
|
self.assertEqual(meta['creator'], "'Certified by IEEE PDFeXpress at 03/19/2016 2:56:07 AM'")
|
2018-03-13 01:01:07 +01:00
|
|
|
|
|
|
|
class TestCleaning(unittest.TestCase):
|
|
|
|
def setUp(self):
|
|
|
|
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
|
|
|
|
|
|
|
|
def tearDown(self):
|
2018-03-18 21:42:12 +01:00
|
|
|
os.remove('./tests/data/clean.pdf')
|
2018-03-13 01:01:07 +01:00
|
|
|
|
|
|
|
def test_pdf(self):
|
|
|
|
p = pdf.PDFParser('./tests/data/clean.pdf')
|
2018-03-18 21:42:12 +01:00
|
|
|
|
|
|
|
meta = p.get_meta()
|
|
|
|
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
|
|
|
|
|
|
|
|
ret = p.remove_all()
|
|
|
|
self.assertTrue(ret)
|
|
|
|
|
|
|
|
p = pdf.PDFParser('./tests/data/clean.pdf.cleaned')
|
2018-03-18 23:48:14 +01:00
|
|
|
expected_meta = {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1}
|
|
|
|
self.assertEqual(p.get_meta(), expected_meta)
|