1
0
mirror of synced 2024-11-27 19:54:23 +01:00

Refactor tests

This commit is contained in:
jvoisin 2019-10-12 13:32:04 -07:00
parent 6d19a20935
commit 3cef7fe7fc
5 changed files with 313 additions and 538 deletions

View File

@ -29,8 +29,7 @@ class ExiftoolParser(abstract.AbstractParser):
def _lightweight_cleanup(self) -> bool: def _lightweight_cleanup(self) -> bool:
if os.path.exists(self.output_filename): if os.path.exists(self.output_filename):
try: try: # exiftool can't force output to existing files
# exiftool can't force output to existing files
os.remove(self.output_filename) os.remove(self.output_filename)
except OSError as e: # pragma: no cover except OSError as e: # pragma: no cover
logging.error("The output file %s is already existing and \ logging.error("The output file %s is already existing and \

View File

@ -120,7 +120,6 @@ class TestGetMeta(unittest.TestCase):
self.assertEqual(meta['4'], '# And an other one') self.assertEqual(meta['4'], '# And an other one')
self.assertEqual(meta['6'], '# and a final one here') self.assertEqual(meta['6'], '# and a final one here')
def test_tiff(self): def test_tiff(self):
p = images.TiffParser('./tests/data/dirty.tiff') p = images.TiffParser('./tests/data/dirty.tiff')
meta = p.get_meta() meta = p.get_meta()
@ -222,6 +221,10 @@ class TestGetMeta(unittest.TestCase):
self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
os.remove('./tests/data/dirty.tar') os.remove('./tests/data/dirty.tar')
def test_svg(self):
p = images.SVGParser('./tests/data/weird.svg')
self.assertEqual(p.get_meta()['Xmlns'], 'http://www.w3.org/1337/svg')
class TestRemovingThumbnails(unittest.TestCase): class TestRemovingThumbnails(unittest.TestCase):
def test_odt(self): def test_odt(self):
@ -281,367 +284,215 @@ class TestRevisionsCleaning(unittest.TestCase):
os.remove('./tests/data/revision_clean.docx') os.remove('./tests/data/revision_clean.docx')
os.remove('./tests/data/revision_clean.cleaned.docx') os.remove('./tests/data/revision_clean.cleaned.docx')
class TestCleaning(unittest.TestCase): class TestCleaning(unittest.TestCase):
def test_pdf(self): data = [{
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf') 'name': 'pdf',
p = pdf.PDFParser('./tests/data/clean.pdf') 'parser': pdf.PDFParser,
'meta': {'producer': 'pdfTeX-1.40.14'},
'expected_meta': {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1},
}, {
'name': 'png',
'parser': images.PNGParser,
'meta': {'Comment': 'This is a comment, be careful!'},
'expected_meta': {},
}, {
'name': 'jpg',
'parser': images.JPGParser,
'meta': {'Comment': 'Created with GIMP'},
'expected_meta': {},
}, {
'name': 'mp3',
'parser': audio.MP3Parser,
'meta': {'TXXX:I am a': 'various comment'},
'expected_meta': {},
}, {
'name': 'ogg',
'parser': audio.OGGParser,
'meta': {'title': 'I am so'},
'expected_meta': {},
}, {
'name': 'flac',
'parser': audio.FLACParser,
'meta': {'title': 'I am so'},
'expected_meta': {},
}, {
'name': 'docx',
'parser': office.MSOfficeParser,
'meta': {'word/media/image1.png' :
{'Comment': 'This is a comment, be careful!',
'ModifyDate': '2018:03:20 21:59:25',
'PixelUnits': 'meters',
'PixelsPerUnitX': 2835,
'PixelsPerUnitY': 2835,
'create_system': 'Weird',
'date_time': '2018-03-31 13:15:38'} ,
},
'expected_meta': {},
}, {
'name': 'odt',
'parser': office.LibreOfficeParser,
'meta': {
'Pictures/1000000000000032000000311EC5314D.png': {
'create_system': 'Weird',
'date_time': '2011-07-26 02:40:16',
'PixelsPerUnitX': 4847,
'PixelsPerUnitY': 4760,
'PixelUnits': 'meters',
},
},
'expected_meta': {},
},{
'name': 'tiff',
'parser': images.TiffParser,
'meta': {'Model': 'C7070WZ'},
'expected_meta':
{'Orientation': 'Horizontal (normal)',
'ResolutionUnit': 'inches',
'XResolution': 72,
'YResolution': 72}
},{
'name': 'bmp',
'parser': harmless.HarmlessParser,
'meta': {},
'expected_meta': {},
},{
'name': 'torrent',
'parser': torrent.TorrentParser,
'meta': {'created by': b'mktorrent 1.0', 'creation date': 1522397702},
'expected_meta': {},
}, {
'name': 'odf',
'parser': office.LibreOfficeParser,
'meta': {'meta.xml': {'create_system': 'Weird', 'date_time':
'2018-04-22 22:20:24', 'meta:initial-creator': 'Julien Voisin',
'meta:creation-date': '2018-04-23T00:18:59.438231281',
'dc:date': '2018-04-23T00:20:23.978564933', 'dc:creator':
'Julien Voisin', 'meta:editing-duration': 'PT1M24S',
'meta:editing-cycles': '1', 'meta:generator':
'LibreOffice/5.4.6.2$Linux_X86_64 LibreOffice_project/40m0$Build-2'}},
'expected_meta': {},
}, {
'name': 'odg',
'parser': office.LibreOfficeParser,
'meta': {'meta.xml': {'create_system': 'Weird', 'date_time':
'2018-04-22 22:26:58', 'meta:initial-creator': 'Julien Voisin',
'meta:creation-date': '2018-04-23T00:25:59.953271949',
'dc:date': '2018-04-23T00:26:59.385838550', 'dc:creator':
'Julien Voisin', 'meta:editing-duration': 'PT59S',
'meta:editing-cycles': '1', 'meta:generator':
'LibreOffice/5.4.6.2$Linux_X86_64 LibreOffice_project/40m0$Build-2'}},
'expected_meta': {},
}, {
'name': 'txt',
'parser': harmless.HarmlessParser,
'meta': {},
'expected_meta': {},
},{
'name': 'gif',
'parser': images.GIFParser,
'meta': {'Comment': 'this is a test comment'},
'expected_meta': {},
},{
'name': 'css',
'parser': web.CSSParser,
'meta': {
'harmful data': 'underline is cool',
'version': '1.0',
'author': 'jvoisin'
},
'expected_meta': {},
},{
'name': 'svg',
'parser': images.SVGParser,
'meta': {
'WorkDescription': "This is a test svg image for mat2's testsuite",
},
'expected_meta': {},
} ,{
'name': 'ppm',
'parser': images.PPMParser,
'meta': {
'1': '# A metadata',
},
'expected_meta': {},
} ,{
'name': 'avi',
'ffmpeg': 1,
'parser': video.AVIParser,
'meta': {
'Software': 'MEncoder SVN-r33148-4.0.1',
},
'expected_meta': {},
} ,{
'name': 'mp4',
'ffmpeg': 1,
'parser': video.MP4Parser,
'meta': {
'Encoder': 'HandBrake 0.9.4 2009112300',
},
'expected_meta':
{'CompatibleBrands': ['isom', 'iso2', 'avc1', 'mp41'],
'CompressorID': 'avc1',
'GraphicsMode': 'srcCopy',
'HandlerDescription': 'SoundHandler',
'HandlerType': 'Metadata',
'HandlerVendorID': 'Apple',
'MajorBrand': 'MP4 Base Media v1 [IS0 14496-12:2003]',
'MediaHeaderVersion': 0,
'MinorVersion': '0.2.0',
'MovieDataOffset': 48,
'MovieHeaderVersion': 0,
'NextTrackID': 3,
'PreferredRate': 1,
'Rotation': 0,
'TimeScale': 1000,
'TrackHeaderVersion': 0,
'TrackID': 1,
'TrackLayer': 0},
},{
'name': 'wmv',
'ffmpeg': 1,
'parser': video.WMVParser,
'meta': {
'EncodingSettings': 'Lavf52.103.0',
},
'expected_meta': {},
}
]
def test_all_parametred(self):
for case in self.data:
if 'ffmpeg' in case:
try:
video._get_ffmpeg_path()
except RuntimeError:
raise unittest.SkipTest
print('[+] Testing %s' % case['name'])
target = './tests/data/clean.' + case['name']
shutil.copy('./tests/data/dirty.' + case['name'], target)
p1 = case['parser'](target)
meta = p1.get_meta()
for k, v in case['meta'].items():
if isinstance(v, dict):
for _k, _v in v.items():
self.assertEqual(meta[k][_k], _v)
else:
self.assertEqual(meta[k], v)
p1.lightweight_cleaning = True
self.assertTrue(p1.remove_all())
p2 = case['parser'](p1.output_filename)
self.assertEqual(p2.get_meta(), case['expected_meta'])
self.assertTrue(p2.remove_all())
os.remove(target)
os.remove(p1.output_filename)
os.remove(p2.output_filename)
meta = p.get_meta()
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
ret = p.remove_all()
self.assertTrue(ret)
p = pdf.PDFParser('./tests/data/clean.cleaned.pdf')
expected_meta = {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1}
self.assertEqual(p.get_meta(), expected_meta)
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.pdf')
os.remove('./tests/data/clean.cleaned.pdf')
os.remove('./tests/data/clean.cleaned.cleaned.pdf')
def test_png(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
p = images.PNGParser('./tests/data/clean.png')
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
ret = p.remove_all()
self.assertTrue(ret)
p = images.PNGParser('./tests/data/clean.cleaned.png')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.png')
os.remove('./tests/data/clean.cleaned.png')
os.remove('./tests/data/clean.cleaned.cleaned.png')
def test_jpg(self):
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
p = images.JPGParser('./tests/data/clean.jpg')
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'Created with GIMP')
ret = p.remove_all()
self.assertTrue(ret)
p = images.JPGParser('./tests/data/clean.cleaned.jpg')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.jpg')
os.remove('./tests/data/clean.cleaned.jpg')
os.remove('./tests/data/clean.cleaned.cleaned.jpg')
def test_mp3(self):
shutil.copy('./tests/data/dirty.mp3', './tests/data/clean.mp3')
p = audio.MP3Parser('./tests/data/clean.mp3')
meta = p.get_meta()
self.assertEqual(meta['TXXX:I am a'], 'various comment')
ret = p.remove_all()
self.assertTrue(ret)
p = audio.MP3Parser('./tests/data/clean.cleaned.mp3')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.mp3')
os.remove('./tests/data/clean.cleaned.mp3')
os.remove('./tests/data/clean.cleaned.cleaned.mp3')
def test_ogg(self):
shutil.copy('./tests/data/dirty.ogg', './tests/data/clean.ogg')
p = audio.OGGParser('./tests/data/clean.ogg')
meta = p.get_meta()
self.assertEqual(meta['title'], 'I am so')
ret = p.remove_all()
self.assertTrue(ret)
p = audio.OGGParser('./tests/data/clean.cleaned.ogg')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.ogg')
os.remove('./tests/data/clean.cleaned.ogg')
os.remove('./tests/data/clean.cleaned.cleaned.ogg')
def test_flac(self):
shutil.copy('./tests/data/dirty.flac', './tests/data/clean.flac')
p = audio.FLACParser('./tests/data/clean.flac')
meta = p.get_meta()
self.assertEqual(meta['title'], 'I am so')
ret = p.remove_all()
self.assertTrue(ret)
p = audio.FLACParser('./tests/data/clean.cleaned.flac')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.flac')
os.remove('./tests/data/clean.cleaned.flac')
os.remove('./tests/data/clean.cleaned.cleaned.flac')
def test_office(self):
shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
meta = p.get_meta()
self.assertIsNotNone(meta)
ret = p.remove_all()
self.assertTrue(ret)
p = office.MSOfficeParser('./tests/data/clean.cleaned.docx')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx')
os.remove('./tests/data/clean.cleaned.cleaned.docx')
def test_libreoffice(self):
shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
p = office.LibreOfficeParser('./tests/data/clean.odt')
meta = p.get_meta()
self.assertIsNotNone(meta)
ret = p.remove_all()
self.assertTrue(ret)
p = office.LibreOfficeParser('./tests/data/clean.cleaned.odt')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.odt')
os.remove('./tests/data/clean.cleaned.odt')
os.remove('./tests/data/clean.cleaned.cleaned.odt')
def test_tiff(self):
shutil.copy('./tests/data/dirty.tiff', './tests/data/clean.tiff')
p = images.TiffParser('./tests/data/clean.tiff')
meta = p.get_meta()
self.assertEqual(meta['Model'], 'C7070WZ')
ret = p.remove_all()
self.assertTrue(ret)
p = images.TiffParser('./tests/data/clean.cleaned.tiff')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.tiff')
os.remove('./tests/data/clean.cleaned.tiff')
os.remove('./tests/data/clean.cleaned.cleaned.tiff')
def test_bmp(self):
shutil.copy('./tests/data/dirty.bmp', './tests/data/clean.bmp')
p = harmless.HarmlessParser('./tests/data/clean.bmp')
meta = p.get_meta()
self.assertEqual(meta, {}) # bmp has no meta :)
ret = p.remove_all()
self.assertTrue(ret)
p = harmless.HarmlessParser('./tests/data/clean.cleaned.bmp')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.bmp')
os.remove('./tests/data/clean.cleaned.bmp')
os.remove('./tests/data/clean.cleaned.cleaned.bmp')
def test_torrent(self):
shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.torrent')
p = torrent.TorrentParser('./tests/data/clean.torrent')
meta = p.get_meta()
self.assertEqual(meta, {'created by': b'mktorrent 1.0', 'creation date': 1522397702})
ret = p.remove_all()
self.assertTrue(ret)
p = torrent.TorrentParser('./tests/data/clean.cleaned.torrent')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.torrent')
os.remove('./tests/data/clean.cleaned.torrent')
os.remove('./tests/data/clean.cleaned.cleaned.torrent')
def test_odf(self):
shutil.copy('./tests/data/dirty.odf', './tests/data/clean.odf')
p = office.LibreOfficeParser('./tests/data/clean.odf')
meta = p.get_meta()
self.assertEqual(meta['meta.xml']['meta:creation-date'], '2018-04-23T00:18:59.438231281')
ret = p.remove_all()
self.assertTrue(ret)
p = office.LibreOfficeParser('./tests/data/clean.cleaned.odf')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.odf')
os.remove('./tests/data/clean.cleaned.odf')
os.remove('./tests/data/clean.cleaned.cleaned.odf')
def test_odg(self):
shutil.copy('./tests/data/dirty.odg', './tests/data/clean.odg')
p = office.LibreOfficeParser('./tests/data/clean.odg')
meta = p.get_meta()
self.assertEqual(meta['meta.xml']['dc:date'], '2018-04-23T00:26:59.385838550')
ret = p.remove_all()
self.assertTrue(ret)
p = office.LibreOfficeParser('./tests/data/clean.cleaned.odg')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.odg')
os.remove('./tests/data/clean.cleaned.odg')
os.remove('./tests/data/clean.cleaned.cleaned.odg')
def test_txt(self):
shutil.copy('./tests/data/dirty.txt', './tests/data/clean.txt')
p = harmless.HarmlessParser('./tests/data/clean.txt')
meta = p.get_meta()
self.assertEqual(meta, {})
ret = p.remove_all()
self.assertTrue(ret)
p = harmless.HarmlessParser('./tests/data/clean.cleaned.txt')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.txt')
os.remove('./tests/data/clean.cleaned.txt')
os.remove('./tests/data/clean.cleaned.cleaned.txt')
def test_avi(self):
try:
video._get_ffmpeg_path()
except RuntimeError:
raise unittest.SkipTest
shutil.copy('./tests/data/dirty.avi', './tests/data/clean.avi')
p = video.AVIParser('./tests/data/clean.avi')
meta = p.get_meta()
self.assertEqual(meta['Software'], 'MEncoder SVN-r33148-4.0.1')
ret = p.remove_all()
self.assertTrue(ret)
p = video.AVIParser('./tests/data/clean.cleaned.avi')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.avi')
os.remove('./tests/data/clean.cleaned.avi')
os.remove('./tests/data/clean.cleaned.cleaned.avi')
def test_zip(self):
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
zout.write('./tests/data/dirty.flac')
zout.write('./tests/data/dirty.docx')
zout.write('./tests/data/dirty.jpg')
p = archive.ZipParser('./tests/data/dirty.zip')
meta = p.get_meta()
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
ret = p.remove_all()
self.assertTrue(ret)
p = archive.ZipParser('./tests/data/dirty.cleaned.zip')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/dirty.zip')
os.remove('./tests/data/dirty.cleaned.zip')
os.remove('./tests/data/dirty.cleaned.cleaned.zip')
def test_mp4(self):
try:
video._get_ffmpeg_path()
except RuntimeError:
raise unittest.SkipTest
shutil.copy('./tests/data/dirty.mp4', './tests/data/clean.mp4')
p = video.MP4Parser('./tests/data/clean.mp4')
meta = p.get_meta()
self.assertEqual(meta['Encoder'], 'HandBrake 0.9.4 2009112300')
ret = p.remove_all()
self.assertTrue(ret)
p = video.MP4Parser('./tests/data/clean.cleaned.mp4')
self.assertNotIn('Encoder', p.get_meta())
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.mp4')
os.remove('./tests/data/clean.cleaned.mp4')
os.remove('./tests/data/clean.cleaned.cleaned.mp4')
def test_wmv(self):
try:
video._get_ffmpeg_path()
except RuntimeError:
raise unittest.SkipTest
shutil.copy('./tests/data/dirty.wmv', './tests/data/clean.wmv')
p = video.WMVParser('./tests/data/clean.wmv')
meta = p.get_meta()
self.assertEqual(meta['EncodingSettings'], 'Lavf52.103.0')
ret = p.remove_all()
self.assertTrue(ret)
p = video.WMVParser('./tests/data/clean.cleaned.wmv')
self.assertNotIn('EncodingSettings', p.get_meta())
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.wmv')
os.remove('./tests/data/clean.cleaned.wmv')
os.remove('./tests/data/clean.cleaned.cleaned.wmv')
def test_gif(self):
shutil.copy('./tests/data/dirty.gif', './tests/data/clean.gif')
p = images.GIFParser('./tests/data/clean.gif')
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'this is a test comment')
ret = p.remove_all()
self.assertTrue(ret)
p = images.GIFParser('./tests/data/clean.cleaned.gif')
self.assertNotIn('EncodingSettings', p.get_meta())
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.gif')
os.remove('./tests/data/clean.cleaned.gif')
os.remove('./tests/data/clean.cleaned.cleaned.gif')
def test_html(self): def test_html(self):
shutil.copy('./tests/data/dirty.html', './tests/data/clean.html') shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
@ -688,7 +539,6 @@ class TestCleaning(unittest.TestCase):
os.remove('./tests/data/clean.html') os.remove('./tests/data/clean.html')
os.remove('./tests/data/clean.cleaned.html') os.remove('./tests/data/clean.cleaned.html')
def test_epub(self): def test_epub(self):
shutil.copy('./tests/data/dirty.epub', './tests/data/clean.epub') shutil.copy('./tests/data/dirty.epub', './tests/data/clean.epub')
p = epub.EPUBParser('./tests/data/clean.epub') p = epub.EPUBParser('./tests/data/clean.epub')
@ -711,25 +561,26 @@ class TestCleaning(unittest.TestCase):
os.remove('./tests/data/clean.cleaned.cleaned.epub') os.remove('./tests/data/clean.cleaned.cleaned.epub')
def test_css(self): class TestCleaningArchives(unittest.TestCase):
shutil.copy('./tests/data/dirty.css', './tests/data/clean.css') def test_zip(self):
p = web.CSSParser('./tests/data/clean.css') with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
zout.write('./tests/data/dirty.flac')
self.assertEqual(p.get_meta(), { zout.write('./tests/data/dirty.docx')
'harmful data': 'underline is cool', zout.write('./tests/data/dirty.jpg')
'version': '1.0', p = archive.ZipParser('./tests/data/dirty.zip')
'author': 'jvoisin'}) meta = p.get_meta()
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
ret = p.remove_all() ret = p.remove_all()
self.assertTrue(ret) self.assertTrue(ret)
p = web.CSSParser('./tests/data/clean.cleaned.css') p = archive.ZipParser('./tests/data/dirty.cleaned.zip')
self.assertEqual(p.get_meta(), {}) self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all()) self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.css') os.remove('./tests/data/dirty.zip')
os.remove('./tests/data/clean.cleaned.css') os.remove('./tests/data/dirty.cleaned.zip')
os.remove('./tests/data/clean.cleaned.cleaned.css') os.remove('./tests/data/dirty.cleaned.cleaned.zip')
def test_tar(self): def test_tar(self):
with tarfile.TarFile.open('./tests/data/dirty.tar', 'w') as zout: with tarfile.TarFile.open('./tests/data/dirty.tar', 'w') as zout:
@ -870,49 +721,3 @@ class TestCleaning(unittest.TestCase):
os.remove('./tests/data/dirty.tar.xz') os.remove('./tests/data/dirty.tar.xz')
os.remove('./tests/data/dirty.cleaned.tar.xz') os.remove('./tests/data/dirty.cleaned.tar.xz')
os.remove('./tests/data/dirty.cleaned.cleaned.tar.xz') os.remove('./tests/data/dirty.cleaned.cleaned.tar.xz')
def test_svg(self):
shutil.copy('./tests/data/dirty.svg', './tests/data/clean.svg')
p = images.SVGParser('./tests/data/clean.svg')
meta = p.get_meta()
self.assertEqual(meta['WorkCreatorAgentTitle'], 'GNOME Design Team')
self.assertEqual(meta['WorkSubject'], ['mat2', 'logo', 'metadata'])
self.assertEqual(meta['ID'], 'svg11300')
self.assertEqual(meta['Output_extension'],
'org.inkscape.output.svg.inkscape')
ret = p.remove_all()
self.assertTrue(ret)
p = images.SVGParser('./tests/data/clean.cleaned.svg')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.svg')
os.remove('./tests/data/clean.cleaned.svg')
os.remove('./tests/data/clean.cleaned.cleaned.svg')
p = images.SVGParser('./tests/data/weird.svg')
self.assertEqual(p.get_meta()['Xmlns'], 'http://www.w3.org/1337/svg')
def test_ppm(self):
shutil.copy('./tests/data/dirty.ppm', './tests/data/clean.ppm')
p = images.PPMParser('./tests/data/clean.ppm')
meta = p.get_meta()
print(meta)
self.assertEqual(meta['1'], '# A metadata')
ret = p.remove_all()
self.assertTrue(ret)
p = images.PPMParser('./tests/data/clean.cleaned.ppm')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.ppm')
os.remove('./tests/data/clean.cleaned.ppm')
os.remove('./tests/data/clean.cleaned.cleaned.ppm')

View File

@ -1,106 +0,0 @@
#!/usr/bin/env python3
import unittest
import shutil
import os
from libmat2 import pdf, images, torrent
class TestLightWeightCleaning(unittest.TestCase):
def test_pdf(self):
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
p = pdf.PDFParser('./tests/data/clean.pdf')
meta = p.get_meta()
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
p = pdf.PDFParser('./tests/data/clean.cleaned.pdf')
expected_meta = {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1}
self.assertEqual(p.get_meta(), expected_meta)
os.remove('./tests/data/clean.pdf')
os.remove('./tests/data/clean.cleaned.pdf')
def test_png(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
p = images.PNGParser('./tests/data/clean.png')
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
p = images.PNGParser('./tests/data/clean.cleaned.png')
self.assertEqual(p.get_meta(), {})
p = images.PNGParser('./tests/data/clean.png')
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
os.remove('./tests/data/clean.png')
os.remove('./tests/data/clean.cleaned.png')
def test_jpg(self):
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
p = images.JPGParser('./tests/data/clean.jpg')
meta = p.get_meta()
self.assertEqual(meta['Comment'], 'Created with GIMP')
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
p = images.JPGParser('./tests/data/clean.cleaned.jpg')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.jpg')
os.remove('./tests/data/clean.cleaned.jpg')
def test_torrent(self):
shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.torrent')
p = torrent.TorrentParser('./tests/data/clean.torrent')
meta = p.get_meta()
self.assertEqual(meta['created by'], b'mktorrent 1.0')
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
p = torrent.TorrentParser('./tests/data/clean.cleaned.torrent')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.torrent')
os.remove('./tests/data/clean.cleaned.torrent')
def test_tiff(self):
shutil.copy('./tests/data/dirty.tiff', './tests/data/clean.tiff')
p = images.TiffParser('./tests/data/clean.tiff')
meta = p.get_meta()
self.assertEqual(meta['ImageDescription'], 'OLYMPUS DIGITAL CAMERA ')
p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret)
p = images.TiffParser('./tests/data/clean.cleaned.tiff')
self.assertEqual(p.get_meta(),
{
'Orientation': 'Horizontal (normal)',
'ResolutionUnit': 'inches',
'XResolution': 72,
'YResolution': 72
}
)
os.remove('./tests/data/clean.tiff')
os.remove('./tests/data/clean.cleaned.tiff')

View File

@ -0,0 +1,76 @@
#!/usr/bin/env python3
import unittest
import shutil
import os
from libmat2 import pdf, images, torrent
class TestLightWeightCleaning(unittest.TestCase):
data = [{
'name': 'pdf',
'parser': pdf.PDFParser,
'meta': {'producer': 'pdfTeX-1.40.14'},
'expected_meta': {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1},
}, {
'name': 'png',
'parser': images.PNGParser,
'meta': {'Comment': 'This is a comment, be careful!'},
'expected_meta': {},
}, {
'name': 'jpg',
'parser': images.JPGParser,
'meta': {'Comment': 'Created with GIMP'},
'expected_meta': {},
}, {
'name': 'torrent',
'parser': torrent.TorrentParser,
'meta': {'created by': b'mktorrent 1.0'},
'expected_meta': {},
},{
'name': 'tiff',
'parser': images.TiffParser,
'meta': {'ImageDescription': 'OLYMPUS DIGITAL CAMERA '},
'expected_meta': {
'Orientation': 'Horizontal (normal)',
'ResolutionUnit': 'inches',
'XResolution': 72,
'YResolution': 72
}
},
]
def test_all(self):
for case in self.data:
target = './tests/data/clean.' + case['name']
shutil.copy('./tests/data/dirty.' + case['name'], target)
p1 = case['parser'](target)
meta = p1.get_meta()
for k, v in case['meta'].items():
self.assertEqual(meta[k], v)
p1.lightweight_cleaning = True
self.assertTrue(p1.remove_all())
p2 = case['parser'](p1.output_filename)
self.assertEqual(p2.get_meta(), case['expected_meta'])
os.remove(target)
os.remove(p1.output_filename)
def test_exiftool_overwrite(self):
target = './tests/data/clean.png'
shutil.copy('./tests/data/dirty.png', target)
p1 = images.PNGParser(target)
p1.lightweight_cleaning = True
shutil.copy('./tests/data/dirty.png', p1.output_filename)
self.assertTrue(p1.remove_all())
p2 = images.PNGParser(p1.output_filename)
self.assertEqual(p2.get_meta(), {})
os.remove(target)
os.remove(p1.output_filename)

View File

@ -7,25 +7,26 @@ import os
from libmat2 import office, UnknownMemberPolicy from libmat2 import office, UnknownMemberPolicy
class TestPolicy(unittest.TestCase): class TestPolicy(unittest.TestCase):
target = './tests/data/clean.docx'
def test_policy_omit(self): def test_policy_omit(self):
shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx') shutil.copy('./tests/data/embedded.docx', self.target)
p = office.MSOfficeParser('./tests/data/clean.docx') p = office.MSOfficeParser(self.target)
p.unknown_member_policy = UnknownMemberPolicy.OMIT p.unknown_member_policy = UnknownMemberPolicy.OMIT
self.assertTrue(p.remove_all()) self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.docx') os.remove(p.filename)
os.remove('./tests/data/clean.cleaned.docx')
def test_policy_keep(self): def test_policy_keep(self):
shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx') shutil.copy('./tests/data/embedded.docx', self.target)
p = office.MSOfficeParser('./tests/data/clean.docx') p = office.MSOfficeParser(self.target)
p.unknown_member_policy = UnknownMemberPolicy.KEEP p.unknown_member_policy = UnknownMemberPolicy.KEEP
self.assertTrue(p.remove_all()) self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.docx') os.remove(p.filename)
os.remove('./tests/data/clean.cleaned.docx') os.remove(p.output_filename)
def test_policy_unknown(self): def test_policy_unknown(self):
shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx') shutil.copy('./tests/data/embedded.docx', self.target)
p = office.MSOfficeParser('./tests/data/clean.docx') p = office.MSOfficeParser(self.target)
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
p.unknown_member_policy = UnknownMemberPolicy('unknown_policy_name_totally_invalid') p.unknown_member_policy = UnknownMemberPolicy('unknown_policy_name_totally_invalid')
os.remove('./tests/data/clean.docx') os.remove(p.filename)