Improve the reliability of the office parser
This commit is contained in:
parent
846a261465
commit
5b38bd7ccd
@ -16,6 +16,13 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
||||
files_to_keep = set() # type: Set[str]
|
||||
files_to_omit = set() # type: Set[Pattern]
|
||||
|
||||
def __init__(self, filename):
|
||||
super().__init__(filename)
|
||||
try: # better fail here than later
|
||||
zipfile.ZipFile(self.filename)
|
||||
except zipfile.BadZipFile:
|
||||
raise ValueError
|
||||
|
||||
def _clean_zipinfo(self, zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo:
|
||||
zipinfo.create_system = 3 # Linux
|
||||
zipinfo.comment = b''
|
||||
|
@ -90,6 +90,11 @@ class TestCorruptedFiles(unittest.TestCase):
|
||||
|
||||
os.remove('./tests/data/clean.torrent')
|
||||
|
||||
def test_odg(self):
|
||||
shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg')
|
||||
with self.assertRaises(ValueError):
|
||||
office.LibreOfficeParser('./tests/data/clean.odg')
|
||||
|
||||
class TestGetMeta(unittest.TestCase):
|
||||
def test_pdf(self):
|
||||
p = pdf.PDFParser('./tests/data/dirty.pdf')
|
||||
|
Loading…
Reference in New Issue
Block a user