1
0
Fork 0
mirror of synced 2025-07-04 04:17:29 +02:00

Add support for html files

This commit is contained in:
jvoisin 2019-02-08 00:26:47 +01:00
parent e1dd439fc8
commit 6cc034e81b
4 changed files with 140 additions and 2 deletions

View file

@ -6,7 +6,7 @@ import os
import zipfile
from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless
from libmat2 import check_dependencies, video, archive
from libmat2 import check_dependencies, video, archive, html
class TestCheckDependencies(unittest.TestCase):
@ -596,3 +596,21 @@ class TestCleaning(unittest.TestCase):
os.remove('./tests/data/clean.gif')
os.remove('./tests/data/clean.cleaned.gif')
os.remove('./tests/data/clean.cleaned.cleaned.gif')
def test_html(self):
shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
p = html.HTMLParser('./tests/data/clean.html')
meta = p.get_meta()
self.assertEqual(meta['author'], 'jvoisin')
ret = p.remove_all()
self.assertTrue(ret)
p = html.HTMLParser('./tests/data/clean.cleaned.html')
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
os.remove('./tests/data/clean.html')
os.remove('./tests/data/clean.cleaned.html')
os.remove('./tests/data/clean.cleaned.cleaned.html')