1
0
Fork 0

zip archives: keep individual files compression type

While hardcoding the compression to zipfile.ZIP_DEFLATED works for
most use cases of mat, being able to produce cleaned up uncompressed
zip files is useful for content that cannot be compressed more.

In addition it also enables to use mat2 for reproducible builds of
Android bootanimation files file that don't support compression.

Signed-off-by: Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org>
This commit is contained in:
Denis 'GNUtoo' Carikli 2021-12-06 17:45:47 +01:00
parent 541b3c83b2
commit 1703ed6ebb
No known key found for this signature in database
GPG Key ID: 5F5DFCC14177E263
2 changed files with 62 additions and 8 deletions

View File

@ -120,6 +120,18 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
# pylint: disable=unused-argument
return member
@staticmethod
def _get_member_compression(member: ArchiveMember):
"""Get the compression of the archive member."""
# pylint: disable=unused-argument
return None
@staticmethod
def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember:
"""Set the compression of the archive member."""
# pylint: disable=unused-argument
return member
def get_meta(self) -> Dict[str, Union[str, dict]]:
meta = dict() # type: Dict[str, Union[str, dict]]
@ -184,6 +196,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
original_permissions = os.stat(full_path).st_mode
os.chmod(full_path, original_permissions | stat.S_IWUSR | stat.S_IRUSR)
original_compression = self._get_member_compression(item)
if self._specific_cleanup(full_path) is False:
logging.warning("Something went wrong during deep cleaning of %s",
member_name)
@ -223,6 +237,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
zinfo = self.member_class(member_name) # type: ignore
zinfo = self._set_member_permissions(zinfo, original_permissions)
zinfo = self._set_member_compression(zinfo, original_compression)
clean_zinfo = self._clean_member(zinfo)
self._add_file_to_archive(zout, clean_zinfo, full_path)
@ -368,7 +383,6 @@ class ZipParser(ArchiveBasedAbstractParser):
super().__init__(filename)
self.archive_class = zipfile.ZipFile
self.member_class = zipfile.ZipInfo
self.zip_compression_type = zipfile.ZIP_DEFLATED
def is_archive_valid(self):
try:
@ -410,7 +424,7 @@ class ZipParser(ArchiveBasedAbstractParser):
assert isinstance(member, zipfile.ZipInfo) # please mypy
with open(full_path, 'rb') as f:
archive.writestr(member, f.read(),
compress_type=self.zip_compression_type)
compress_type=member.compress_type)
@staticmethod
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
@ -421,3 +435,12 @@ class ZipParser(ArchiveBasedAbstractParser):
def _get_member_name(member: ArchiveMember) -> str:
assert isinstance(member, zipfile.ZipInfo) # please mypy
return member.filename
@staticmethod
def _get_member_compression(member: ArchiveMember):
return member.compress_type
@staticmethod
def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember:
member.compress_type = compression
return member

View File

@ -175,14 +175,30 @@ class TestGetMeta(unittest.TestCase):
def test_zip(self):
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
zout.write('./tests/data/dirty.flac')
zout.write('./tests/data/dirty.docx')
zout.write('./tests/data/dirty.jpg')
zout.write('./tests/data/dirty.flac',
compress_type = zipfile.ZIP_STORED)
zout.write('./tests/data/dirty.docx',
compress_type = zipfile.ZIP_DEFLATED)
zout.write('./tests/data/dirty.jpg',
compress_type = zipfile.ZIP_BZIP2)
zout.write('./tests/data/dirty.txt',
compress_type = zipfile.ZIP_LZMA)
p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip')
self.assertEqual(mimetype, 'application/zip')
meta = p.get_meta()
self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
with zipfile.ZipFile('./tests/data/dirty.zip') as zipin:
members = {
'tests/data/dirty.flac' : zipfile.ZIP_STORED,
'tests/data/dirty.docx': zipfile.ZIP_DEFLATED,
'tests/data/dirty.jpg' : zipfile.ZIP_BZIP2,
'tests/data/dirty.txt' : zipfile.ZIP_LZMA,
}
for k, v in members.items():
self.assertEqual(zipin.getinfo(k).compress_type, v)
os.remove('./tests/data/dirty.zip')
def test_wmv(self):
@ -595,9 +611,14 @@ class TestCleaning(unittest.TestCase):
class TestCleaningArchives(unittest.TestCase):
def test_zip(self):
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
zout.write('./tests/data/dirty.flac')
zout.write('./tests/data/dirty.docx')
zout.write('./tests/data/dirty.jpg')
zout.write('./tests/data/dirty.flac',
compress_type = zipfile.ZIP_STORED)
zout.write('./tests/data/dirty.docx',
compress_type = zipfile.ZIP_DEFLATED)
zout.write('./tests/data/dirty.jpg',
compress_type = zipfile.ZIP_BZIP2)
zout.write('./tests/data/dirty.txt',
compress_type = zipfile.ZIP_LZMA)
p = archive.ZipParser('./tests/data/dirty.zip')
meta = p.get_meta()
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
@ -609,6 +630,16 @@ class TestCleaningArchives(unittest.TestCase):
self.assertEqual(p.get_meta(), {})
self.assertTrue(p.remove_all())
with zipfile.ZipFile('./tests/data/dirty.zip') as zipin:
members = {
'tests/data/dirty.flac' : zipfile.ZIP_STORED,
'tests/data/dirty.docx': zipfile.ZIP_DEFLATED,
'tests/data/dirty.jpg' : zipfile.ZIP_BZIP2,
'tests/data/dirty.txt' : zipfile.ZIP_LZMA,
}
for k, v in members.items():
self.assertEqual(zipin.getinfo(k).compress_type, v)
os.remove('./tests/data/dirty.zip')
os.remove('./tests/data/dirty.cleaned.zip')
os.remove('./tests/data/dirty.cleaned.cleaned.zip')