Add tar archive support
This commit is contained in:
parent
20ed5eb7d6
commit
82cc822a1d
5 changed files with 274 additions and 69 deletions
|
@ -1,13 +1,15 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import unittest
|
||||
import time
|
||||
import shutil
|
||||
import os
|
||||
import logging
|
||||
import zipfile
|
||||
import tarfile
|
||||
|
||||
from libmat2 import pdf, images, audio, office, parser_factory, torrent
|
||||
from libmat2 import harmless, video, web
|
||||
from libmat2 import harmless, video, web, archive
|
||||
|
||||
# No need to logging messages, should something go wrong,
|
||||
# the testsuite _will_ fail.
|
||||
|
@ -278,7 +280,6 @@ class TestCorruptedFiles(unittest.TestCase):
|
|||
p.remove_all()
|
||||
os.remove('./tests/data/clean.html')
|
||||
|
||||
|
||||
def test_epub(self):
|
||||
with zipfile.ZipFile('./tests/data/clean.epub', 'w') as zout:
|
||||
zout.write('./tests/data/dirty.jpg', 'OEBPS/content.opf')
|
||||
|
@ -291,3 +292,27 @@ class TestCorruptedFiles(unittest.TestCase):
|
|||
self.assertFalse(p.remove_all())
|
||||
os.remove('./tests/data/clean.epub')
|
||||
|
||||
def test_tar(self):
|
||||
with tarfile.TarFile('./tests/data/clean.tar', 'w') as zout:
|
||||
zout.add('./tests/data/dirty.flac')
|
||||
zout.add('./tests/data/dirty.docx')
|
||||
zout.add('./tests/data/dirty.jpg')
|
||||
zout.add('./tests/data/embedded_corrupted.docx')
|
||||
tarinfo = tarfile.TarInfo(name='./tests/data/dirty.png')
|
||||
tarinfo.mtime = time.time()
|
||||
tarinfo.uid = 1337
|
||||
tarinfo.gid = 1338
|
||||
with open('./tests/data/dirty.png', 'rb') as f:
|
||||
zout.addfile(tarinfo, f)
|
||||
p, mimetype = parser_factory.get_parser('./tests/data/clean.tar')
|
||||
self.assertEqual(mimetype, 'application/x-tar')
|
||||
meta = p.get_meta()
|
||||
self.assertEqual(meta['./tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
|
||||
self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
|
||||
self.assertFalse(p.remove_all())
|
||||
os.remove('./tests/data/clean.tar')
|
||||
|
||||
shutil.copy('./tests/data/dirty.png', './tests/data/clean.tar')
|
||||
with self.assertRaises(ValueError):
|
||||
archive.TarParser('./tests/data/clean.tar')
|
||||
os.remove('./tests/data/clean.tar')
|
||||
|
|
|
@ -4,6 +4,8 @@ import unittest
|
|||
import shutil
|
||||
import os
|
||||
import re
|
||||
import tarfile
|
||||
import tempfile
|
||||
import zipfile
|
||||
|
||||
from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless
|
||||
|
@ -195,6 +197,19 @@ class TestGetMeta(unittest.TestCase):
|
|||
self.assertEqual(meta['version'], '1.0')
|
||||
self.assertEqual(meta['harmful data'], 'underline is cool')
|
||||
|
||||
def test_tar(self):
|
||||
with tarfile.TarFile('./tests/data/dirty.tar', 'w') as tout:
|
||||
tout.add('./tests/data/dirty.flac')
|
||||
tout.add('./tests/data/dirty.docx')
|
||||
tout.add('./tests/data/dirty.jpg')
|
||||
p, mimetype = parser_factory.get_parser('./tests/data/dirty.tar')
|
||||
self.assertEqual(mimetype, 'application/x-tar')
|
||||
meta = p.get_meta()
|
||||
self.assertEqual(meta['./tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
|
||||
self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
|
||||
os.remove('./tests/data/dirty.tar')
|
||||
|
||||
|
||||
class TestRemovingThumbnails(unittest.TestCase):
|
||||
def test_odt(self):
|
||||
shutil.copy('./tests/data/revision.odt', './tests/data/clean.odt')
|
||||
|
@ -702,3 +717,38 @@ class TestCleaning(unittest.TestCase):
|
|||
os.remove('./tests/data/clean.css')
|
||||
os.remove('./tests/data/clean.cleaned.css')
|
||||
os.remove('./tests/data/clean.cleaned.cleaned.css')
|
||||
|
||||
def test_tar(self):
|
||||
with tarfile.TarFile('./tests/data/dirty.tar', 'w') as zout:
|
||||
zout.add('./tests/data/dirty.flac')
|
||||
zout.add('./tests/data/dirty.docx')
|
||||
zout.add('./tests/data/dirty.jpg')
|
||||
p = archive.TarParser('./tests/data/dirty.tar')
|
||||
meta = p.get_meta()
|
||||
self.assertEqual(meta['./tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
|
||||
|
||||
ret = p.remove_all()
|
||||
self.assertTrue(ret)
|
||||
|
||||
p = archive.TarParser('./tests/data/dirty.cleaned.tar')
|
||||
self.assertEqual(p.get_meta(), {})
|
||||
self.assertTrue(p.remove_all())
|
||||
|
||||
tmp_dir = tempfile.mkdtemp()
|
||||
with tarfile.open('./tests/data/dirty.cleaned.tar') as zout:
|
||||
zout.extractall(path=tmp_dir)
|
||||
zout.close()
|
||||
|
||||
number_of_files = 0
|
||||
for root, _, fnames in os.walk(tmp_dir):
|
||||
for f in fnames:
|
||||
complete_path = os.path.join(root, f)
|
||||
p, _ = parser_factory.get_parser(complete_path)
|
||||
self.assertIsNotNone(p)
|
||||
self.assertEqual(p.get_meta(), {})
|
||||
number_of_files += 1
|
||||
self.assertEqual(number_of_files, 3)
|
||||
|
||||
os.remove('./tests/data/dirty.tar')
|
||||
os.remove('./tests/data/dirty.cleaned.tar')
|
||||
os.remove('./tests/data/dirty.cleaned.cleaned.tar')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue