Improve the reliability of the office parser
This commit is contained in:
parent
846a261465
commit
5b38bd7ccd
@ -16,6 +16,13 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
files_to_keep = set() # type: Set[str]
|
files_to_keep = set() # type: Set[str]
|
||||||
files_to_omit = set() # type: Set[Pattern]
|
files_to_omit = set() # type: Set[Pattern]
|
||||||
|
|
||||||
|
def __init__(self, filename):
|
||||||
|
super().__init__(filename)
|
||||||
|
try: # better fail here than later
|
||||||
|
zipfile.ZipFile(self.filename)
|
||||||
|
except zipfile.BadZipFile:
|
||||||
|
raise ValueError
|
||||||
|
|
||||||
def _clean_zipinfo(self, zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo:
|
def _clean_zipinfo(self, zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo:
|
||||||
zipinfo.create_system = 3 # Linux
|
zipinfo.create_system = 3 # Linux
|
||||||
zipinfo.comment = b''
|
zipinfo.comment = b''
|
||||||
|
@ -90,6 +90,11 @@ class TestCorruptedFiles(unittest.TestCase):
|
|||||||
|
|
||||||
os.remove('./tests/data/clean.torrent')
|
os.remove('./tests/data/clean.torrent')
|
||||||
|
|
||||||
|
def test_odg(self):
|
||||||
|
shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg')
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
office.LibreOfficeParser('./tests/data/clean.odg')
|
||||||
|
|
||||||
class TestGetMeta(unittest.TestCase):
|
class TestGetMeta(unittest.TestCase):
|
||||||
def test_pdf(self):
|
def test_pdf(self):
|
||||||
p = pdf.PDFParser('./tests/data/dirty.pdf')
|
p = pdf.PDFParser('./tests/data/dirty.pdf')
|
||||||
|
Loading…
Reference in New Issue
Block a user