Add a test for nsid cleaning
This commit is contained in:
parent
0170f0e37e
commit
fc924239fe
@ -140,6 +140,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
Copyright 2018 Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org>
|
Copyright 2018 Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org>
|
||||||
Copyright 2016 Marie Rose for MAT2's logo
|
Copyright 2016 Marie Rose for MAT2's logo
|
||||||
|
|
||||||
|
The `tests/data/dirty_with_nsid.docx` file is licensed under GPLv3,
|
||||||
|
and was borrowed from the Calibre project: https://calibre-ebook.com/downloads/demos/demo.docx
|
||||||
|
|
||||||
# Thanks
|
# Thanks
|
||||||
|
|
||||||
MAT2 wouldn't exist without:
|
MAT2 wouldn't exist without:
|
||||||
|
BIN
tests/data/dirty_with_nsid.docx
Normal file
BIN
tests/data/dirty_with_nsid.docx
Normal file
Binary file not shown.
@ -137,3 +137,34 @@ class TestRsidRemoval(unittest.TestCase):
|
|||||||
|
|
||||||
os.remove('./tests/data/clean.docx')
|
os.remove('./tests/data/clean.docx')
|
||||||
os.remove('./tests/data/clean.cleaned.docx')
|
os.remove('./tests/data/clean.cleaned.docx')
|
||||||
|
|
||||||
|
|
||||||
|
class TestNsidRemoval(unittest.TestCase):
|
||||||
|
def test_office(self):
|
||||||
|
shutil.copy('./tests/data/dirty_with_nsid.docx', './tests/data/clean.docx')
|
||||||
|
p = office.MSOfficeParser('./tests/data/clean.docx')
|
||||||
|
|
||||||
|
meta = p.get_meta()
|
||||||
|
self.assertIsNotNone(meta)
|
||||||
|
|
||||||
|
how_many_rsid = False
|
||||||
|
with zipfile.ZipFile('./tests/data/clean.docx') as zin:
|
||||||
|
for item in zin.infolist():
|
||||||
|
if not item.filename.endswith('.xml'):
|
||||||
|
continue
|
||||||
|
num = zin.read(item).decode('utf-8').lower().count('w:rsid')
|
||||||
|
how_many_rsid += num
|
||||||
|
self.assertEqual(how_many_rsid, 1190)
|
||||||
|
|
||||||
|
ret = p.remove_all()
|
||||||
|
self.assertTrue(ret)
|
||||||
|
|
||||||
|
with zipfile.ZipFile('./tests/data/clean.cleaned.docx') as zin:
|
||||||
|
for item in zin.infolist():
|
||||||
|
if not item.filename.endswith('.xml'):
|
||||||
|
continue
|
||||||
|
num = zin.read(item).decode('utf-8').lower().count('w:nsid')
|
||||||
|
self.assertEqual(num, 0)
|
||||||
|
|
||||||
|
os.remove('./tests/data/clean.docx')
|
||||||
|
os.remove('./tests/data/clean.cleaned.docx')
|
||||||
|
Loading…
Reference in New Issue
Block a user