1
0
mirror of synced 2024-11-22 01:04:23 +01:00

Add a test for nsid cleaning

This commit is contained in:
jvoisin 2019-09-01 13:34:26 +02:00
parent 0170f0e37e
commit fc924239fe
3 changed files with 34 additions and 0 deletions

View File

@ -140,6 +140,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
Copyright 2018 Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org> Copyright 2018 Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org>
Copyright 2016 Marie Rose for MAT2's logo Copyright 2016 Marie Rose for MAT2's logo
The `tests/data/dirty_with_nsid.docx` file is licensed under GPLv3,
and was borrowed from the Calibre project: https://calibre-ebook.com/downloads/demos/demo.docx
# Thanks # Thanks
MAT2 wouldn't exist without: MAT2 wouldn't exist without:

Binary file not shown.

View File

@ -137,3 +137,34 @@ class TestRsidRemoval(unittest.TestCase):
os.remove('./tests/data/clean.docx') os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx') os.remove('./tests/data/clean.cleaned.docx')
class TestNsidRemoval(unittest.TestCase):
def test_office(self):
shutil.copy('./tests/data/dirty_with_nsid.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
meta = p.get_meta()
self.assertIsNotNone(meta)
how_many_rsid = False
with zipfile.ZipFile('./tests/data/clean.docx') as zin:
for item in zin.infolist():
if not item.filename.endswith('.xml'):
continue
num = zin.read(item).decode('utf-8').lower().count('w:rsid')
how_many_rsid += num
self.assertEqual(how_many_rsid, 1190)
ret = p.remove_all()
self.assertTrue(ret)
with zipfile.ZipFile('./tests/data/clean.cleaned.docx') as zin:
for item in zin.infolist():
if not item.filename.endswith('.xml'):
continue
num = zin.read(item).decode('utf-8').lower().count('w:nsid')
self.assertEqual(num, 0)
os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx')