Implement get_meta() for archives
This commit is contained in:
parent
5a9dc388ad
commit
513d897ea0
@ -67,6 +67,31 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
|
|
||||||
return metadata
|
return metadata
|
||||||
|
|
||||||
|
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||||
|
meta = dict() # type: Dict[str, Union[str, dict]]
|
||||||
|
|
||||||
|
with zipfile.ZipFile(self.filename) as zin:
|
||||||
|
temp_folder = tempfile.mkdtemp()
|
||||||
|
|
||||||
|
for item in zin.infolist():
|
||||||
|
if item.filename[-1] == '/': # pragma: no cover
|
||||||
|
# `is_dir` is added in Python3.6
|
||||||
|
continue # don't keep empty folders
|
||||||
|
|
||||||
|
zin.extract(member=item, path=temp_folder)
|
||||||
|
full_path = os.path.join(temp_folder, item.filename)
|
||||||
|
|
||||||
|
tmp_parser, _ = parser_factory.get_parser(full_path) # type: ignore
|
||||||
|
if not tmp_parser:
|
||||||
|
continue
|
||||||
|
|
||||||
|
local_meta = tmp_parser.get_meta()
|
||||||
|
if local_meta:
|
||||||
|
meta[item.filename] = local_meta
|
||||||
|
|
||||||
|
shutil.rmtree(temp_folder)
|
||||||
|
return meta
|
||||||
|
|
||||||
def remove_all(self) -> bool:
|
def remove_all(self) -> bool:
|
||||||
# pylint: disable=too-many-branches
|
# pylint: disable=too-many-branches
|
||||||
|
|
||||||
|
@ -301,7 +301,7 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
|
|||||||
Yes, I know that parsing xml with regexp ain't pretty,
|
Yes, I know that parsing xml with regexp ain't pretty,
|
||||||
be my guest and fix it if you want.
|
be my guest and fix it if you want.
|
||||||
"""
|
"""
|
||||||
metadata = {}
|
metadata = super().get_meta()
|
||||||
zipin = zipfile.ZipFile(self.filename)
|
zipin = zipfile.ZipFile(self.filename)
|
||||||
for item in zipin.infolist():
|
for item in zipin.infolist():
|
||||||
if item.filename.startswith('docProps/') and item.filename.endswith('.xml'):
|
if item.filename.startswith('docProps/') and item.filename.endswith('.xml'):
|
||||||
|
@ -36,6 +36,7 @@ class TestZipMetadata(unittest.TestCase):
|
|||||||
|
|
||||||
meta = p.get_meta()
|
meta = p.get_meta()
|
||||||
self.assertIsNotNone(meta)
|
self.assertIsNotNone(meta)
|
||||||
|
self.assertEqual(meta['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
|
||||||
|
|
||||||
ret = p.remove_all()
|
ret = p.remove_all()
|
||||||
self.assertTrue(ret)
|
self.assertTrue(ret)
|
||||||
|
Loading…
Reference in New Issue
Block a user