1
0
mirror of synced 2024-06-07 06:47:48 +02:00

Massively simplify how we're cleaning office files

This commit is contained in:
jvoisin 2018-06-27 21:48:46 +02:00
parent f44769df41
commit 177184ac67

View File

@ -47,28 +47,10 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
return metadata
def _clean_internal_file(self, item: zipfile.ZipInfo, temp_folder: str,
zin: zipfile.ZipFile, zout: zipfile.ZipFile) -> bool:
zin.extract(member=item, path=temp_folder)
full_path = os.path.join(temp_folder, item.filename)
tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore
if not tmp_parser:
zout.close()
os.remove(self.output_filename)
print("%s's format (%s) isn't supported" % (item.filename, mtype))
return False
tmp_parser.remove_all()
zinfo = zipfile.ZipInfo(item.filename) # type: ignore
clean_zinfo = self._clean_zipinfo(zinfo)
with open(tmp_parser.output_filename, 'rb') as f:
zout.writestr(clean_zinfo, f.read())
return True
def remove_all(self) -> bool:
zin = zipfile.ZipFile(self.filename, 'r')
zout = zipfile.ZipFile(self.output_filename, 'w')
with zipfile.ZipFile(self.filename) as zin,\
zipfile.ZipFile(self.output_filename, 'w') as zout:
temp_folder = tempfile.mkdtemp()
for item in zin.infolist():
@ -80,12 +62,23 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
continue
elif any(map(lambda r: r.search(item.filename), self.files_to_omit)):
continue
elif not self._clean_internal_file(item, temp_folder, zin, zout):
zin.extract(member=item, path=temp_folder)
full_path = os.path.join(temp_folder, item.filename)
tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore
if not tmp_parser:
shutil.rmtree(temp_folder)
os.remove(self.output_filename)
print("%s's format (%s) isn't supported" % (item.filename, mtype))
return False
tmp_parser.remove_all()
zinfo = zipfile.ZipInfo(item.filename) # type: ignore
clean_zinfo = self._clean_zipinfo(zinfo)
with open(tmp_parser.output_filename, 'rb') as f:
zout.writestr(clean_zinfo, f.read())
shutil.rmtree(temp_folder)
zout.close()
zin.close()
return True