1
0
Fork 0

zip archives: keep individual files compression type

While hardcoding the compression to zipfile.ZIP_DEFLATED works for
most use cases of mat, being able to produce cleaned up uncompressed
zip files is useful for content that cannot be compressed more.

In addition it also enables to use mat2 for reproducible builds of
Android bootanimation files file that don't support compression.

Signed-off-by: Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org>
This commit is contained in:
Denis 'GNUtoo' Carikli 2021-12-06 17:45:47 +01:00
parent 541b3c83b2
commit 1703ed6ebb
No known key found for this signature in database
GPG Key ID: 5F5DFCC14177E263
2 changed files with 62 additions and 8 deletions

View File

@ -120,6 +120,18 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# pylint: disable=unused-argument # pylint: disable=unused-argument
return member return member
@staticmethod
def _get_member_compression(member: ArchiveMember):
"""Get the compression of the archive member."""
# pylint: disable=unused-argument
return None
@staticmethod
def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember:
"""Set the compression of the archive member."""
# pylint: disable=unused-argument
return member
def get_meta(self) -> Dict[str, Union[str, dict]]: def get_meta(self) -> Dict[str, Union[str, dict]]:
meta = dict() # type: Dict[str, Union[str, dict]] meta = dict() # type: Dict[str, Union[str, dict]]
@ -184,6 +196,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
original_permissions = os.stat(full_path).st_mode original_permissions = os.stat(full_path).st_mode
os.chmod(full_path, original_permissions | stat.S_IWUSR | stat.S_IRUSR) os.chmod(full_path, original_permissions | stat.S_IWUSR | stat.S_IRUSR)
original_compression = self._get_member_compression(item)
if self._specific_cleanup(full_path) is False: if self._specific_cleanup(full_path) is False:
logging.warning("Something went wrong during deep cleaning of %s", logging.warning("Something went wrong during deep cleaning of %s",
member_name) member_name)
@ -223,6 +237,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
zinfo = self.member_class(member_name) # type: ignore zinfo = self.member_class(member_name) # type: ignore
zinfo = self._set_member_permissions(zinfo, original_permissions) zinfo = self._set_member_permissions(zinfo, original_permissions)
zinfo = self._set_member_compression(zinfo, original_compression)
clean_zinfo = self._clean_member(zinfo) clean_zinfo = self._clean_member(zinfo)
self._add_file_to_archive(zout, clean_zinfo, full_path) self._add_file_to_archive(zout, clean_zinfo, full_path)
@ -368,7 +383,6 @@ class ZipParser(ArchiveBasedAbstractParser):
super().__init__(filename) super().__init__(filename)
self.archive_class = zipfile.ZipFile self.archive_class = zipfile.ZipFile
self.member_class = zipfile.ZipInfo self.member_class = zipfile.ZipInfo
self.zip_compression_type = zipfile.ZIP_DEFLATED
def is_archive_valid(self): def is_archive_valid(self):
try: try:
@ -410,7 +424,7 @@ class ZipParser(ArchiveBasedAbstractParser):
assert isinstance(member, zipfile.ZipInfo) # please mypy assert isinstance(member, zipfile.ZipInfo) # please mypy
with open(full_path, 'rb') as f: with open(full_path, 'rb') as f:
archive.writestr(member, f.read(), archive.writestr(member, f.read(),
compress_type=self.zip_compression_type) compress_type=member.compress_type)
@staticmethod @staticmethod
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]: def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
@ -421,3 +435,12 @@ class ZipParser(ArchiveBasedAbstractParser):
def _get_member_name(member: ArchiveMember) -> str: def _get_member_name(member: ArchiveMember) -> str:
assert isinstance(member, zipfile.ZipInfo) # please mypy assert isinstance(member, zipfile.ZipInfo) # please mypy
return member.filename return member.filename
@staticmethod
def _get_member_compression(member: ArchiveMember):
return member.compress_type
@staticmethod
def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember:
member.compress_type = compression
return member

View File

@ -175,14 +175,30 @@ class TestGetMeta(unittest.TestCase):
def test_zip(self): def test_zip(self):
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout: with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
zout.write('./tests/data/dirty.flac') zout.write('./tests/data/dirty.flac',
zout.write('./tests/data/dirty.docx') compress_type = zipfile.ZIP_STORED)
zout.write('./tests/data/dirty.jpg') zout.write('./tests/data/dirty.docx',
compress_type = zipfile.ZIP_DEFLATED)
zout.write('./tests/data/dirty.jpg',
compress_type = zipfile.ZIP_BZIP2)
zout.write('./tests/data/dirty.txt',
compress_type = zipfile.ZIP_LZMA)
p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip') p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip')
self.assertEqual(mimetype, 'application/zip') self.assertEqual(mimetype, 'application/zip')
meta = p.get_meta() meta = p.get_meta()
self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
with zipfile.ZipFile('./tests/data/dirty.zip') as zipin:
members = {
'tests/data/dirty.flac' : zipfile.ZIP_STORED,
'tests/data/dirty.docx': zipfile.ZIP_DEFLATED,
'tests/data/dirty.jpg' : zipfile.ZIP_BZIP2,
'tests/data/dirty.txt' : zipfile.ZIP_LZMA,
}
for k, v in members.items():
self.assertEqual(zipin.getinfo(k).compress_type, v)
os.remove('./tests/data/dirty.zip') os.remove('./tests/data/dirty.zip')
def test_wmv(self): def test_wmv(self):
@ -595,9 +611,14 @@ class TestCleaning(unittest.TestCase):
class TestCleaningArchives(unittest.TestCase): class TestCleaningArchives(unittest.TestCase):
def test_zip(self): def test_zip(self):
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout: with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
zout.write('./tests/data/dirty.flac') zout.write('./tests/data/dirty.flac',
zout.write('./tests/data/dirty.docx') compress_type = zipfile.ZIP_STORED)
zout.write('./tests/data/dirty.jpg') zout.write('./tests/data/dirty.docx',
compress_type = zipfile.ZIP_DEFLATED)
zout.write('./tests/data/dirty.jpg',
compress_type = zipfile.ZIP_BZIP2)
zout.write('./tests/data/dirty.txt',
compress_type = zipfile.ZIP_LZMA)
p = archive.ZipParser('./tests/data/dirty.zip') p = archive.ZipParser('./tests/data/dirty.zip')
meta = p.get_meta() meta = p.get_meta()
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
@ -609,6 +630,16 @@ class TestCleaningArchives(unittest.TestCase):
self.assertEqual(p.get_meta(), {}) self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all()) self.assertTrue(p.remove_all())
with zipfile.ZipFile('./tests/data/dirty.zip') as zipin:
members = {
'tests/data/dirty.flac' : zipfile.ZIP_STORED,
'tests/data/dirty.docx': zipfile.ZIP_DEFLATED,
'tests/data/dirty.jpg' : zipfile.ZIP_BZIP2,
'tests/data/dirty.txt' : zipfile.ZIP_LZMA,
}
for k, v in members.items():
self.assertEqual(zipin.getinfo(k).compress_type, v)
os.remove('./tests/data/dirty.zip') os.remove('./tests/data/dirty.zip')
os.remove('./tests/data/dirty.cleaned.zip') os.remove('./tests/data/dirty.cleaned.zip')
os.remove('./tests/data/dirty.cleaned.cleaned.zip') os.remove('./tests/data/dirty.cleaned.cleaned.zip')