From 1703ed6ebbe2552990fb847d5fd5dde1d0382a37 Mon Sep 17 00:00:00 2001 From: Denis 'GNUtoo' Carikli Date: Mon, 6 Dec 2021 17:45:47 +0100 Subject: [PATCH] zip archives: keep individual files compression type While hardcoding the compression to zipfile.ZIP_DEFLATED works for most use cases of mat, being able to produce cleaned up uncompressed zip files is useful for content that cannot be compressed more. In addition it also enables to use mat2 for reproducible builds of Android bootanimation files file that don't support compression. Signed-off-by: Denis 'GNUtoo' Carikli --- libmat2/archive.py | 27 +++++++++++++++++++++++++-- tests/test_libmat2.py | 43 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/libmat2/archive.py b/libmat2/archive.py index 48c1594..29db417 100644 --- a/libmat2/archive.py +++ b/libmat2/archive.py @@ -120,6 +120,18 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): # pylint: disable=unused-argument return member + @staticmethod + def _get_member_compression(member: ArchiveMember): + """Get the compression of the archive member.""" + # pylint: disable=unused-argument + return None + + @staticmethod + def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember: + """Set the compression of the archive member.""" + # pylint: disable=unused-argument + return member + def get_meta(self) -> Dict[str, Union[str, dict]]: meta = dict() # type: Dict[str, Union[str, dict]] @@ -184,6 +196,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): original_permissions = os.stat(full_path).st_mode os.chmod(full_path, original_permissions | stat.S_IWUSR | stat.S_IRUSR) + original_compression = self._get_member_compression(item) + if self._specific_cleanup(full_path) is False: logging.warning("Something went wrong during deep cleaning of %s", member_name) @@ -223,6 +237,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): zinfo = self.member_class(member_name) # type: ignore zinfo = self._set_member_permissions(zinfo, original_permissions) + zinfo = self._set_member_compression(zinfo, original_compression) clean_zinfo = self._clean_member(zinfo) self._add_file_to_archive(zout, clean_zinfo, full_path) @@ -368,7 +383,6 @@ class ZipParser(ArchiveBasedAbstractParser): super().__init__(filename) self.archive_class = zipfile.ZipFile self.member_class = zipfile.ZipInfo - self.zip_compression_type = zipfile.ZIP_DEFLATED def is_archive_valid(self): try: @@ -410,7 +424,7 @@ class ZipParser(ArchiveBasedAbstractParser): assert isinstance(member, zipfile.ZipInfo) # please mypy with open(full_path, 'rb') as f: archive.writestr(member, f.read(), - compress_type=self.zip_compression_type) + compress_type=member.compress_type) @staticmethod def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]: @@ -421,3 +435,12 @@ class ZipParser(ArchiveBasedAbstractParser): def _get_member_name(member: ArchiveMember) -> str: assert isinstance(member, zipfile.ZipInfo) # please mypy return member.filename + + @staticmethod + def _get_member_compression(member: ArchiveMember): + return member.compress_type + + @staticmethod + def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember: + member.compress_type = compression + return member diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index af92db3..f8e62de 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -175,14 +175,30 @@ class TestGetMeta(unittest.TestCase): def test_zip(self): with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout: - zout.write('./tests/data/dirty.flac') - zout.write('./tests/data/dirty.docx') - zout.write('./tests/data/dirty.jpg') + zout.write('./tests/data/dirty.flac', + compress_type = zipfile.ZIP_STORED) + zout.write('./tests/data/dirty.docx', + compress_type = zipfile.ZIP_DEFLATED) + zout.write('./tests/data/dirty.jpg', + compress_type = zipfile.ZIP_BZIP2) + zout.write('./tests/data/dirty.txt', + compress_type = zipfile.ZIP_LZMA) p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip') self.assertEqual(mimetype, 'application/zip') meta = p.get_meta() self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') + + with zipfile.ZipFile('./tests/data/dirty.zip') as zipin: + members = { + 'tests/data/dirty.flac' : zipfile.ZIP_STORED, + 'tests/data/dirty.docx': zipfile.ZIP_DEFLATED, + 'tests/data/dirty.jpg' : zipfile.ZIP_BZIP2, + 'tests/data/dirty.txt' : zipfile.ZIP_LZMA, + } + for k, v in members.items(): + self.assertEqual(zipin.getinfo(k).compress_type, v) + os.remove('./tests/data/dirty.zip') def test_wmv(self): @@ -595,9 +611,14 @@ class TestCleaning(unittest.TestCase): class TestCleaningArchives(unittest.TestCase): def test_zip(self): with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout: - zout.write('./tests/data/dirty.flac') - zout.write('./tests/data/dirty.docx') - zout.write('./tests/data/dirty.jpg') + zout.write('./tests/data/dirty.flac', + compress_type = zipfile.ZIP_STORED) + zout.write('./tests/data/dirty.docx', + compress_type = zipfile.ZIP_DEFLATED) + zout.write('./tests/data/dirty.jpg', + compress_type = zipfile.ZIP_BZIP2) + zout.write('./tests/data/dirty.txt', + compress_type = zipfile.ZIP_LZMA) p = archive.ZipParser('./tests/data/dirty.zip') meta = p.get_meta() self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') @@ -609,6 +630,16 @@ class TestCleaningArchives(unittest.TestCase): self.assertEqual(p.get_meta(), {}) self.assertTrue(p.remove_all()) + with zipfile.ZipFile('./tests/data/dirty.zip') as zipin: + members = { + 'tests/data/dirty.flac' : zipfile.ZIP_STORED, + 'tests/data/dirty.docx': zipfile.ZIP_DEFLATED, + 'tests/data/dirty.jpg' : zipfile.ZIP_BZIP2, + 'tests/data/dirty.txt' : zipfile.ZIP_LZMA, + } + for k, v in members.items(): + self.assertEqual(zipin.getinfo(k).compress_type, v) + os.remove('./tests/data/dirty.zip') os.remove('./tests/data/dirty.cleaned.zip') os.remove('./tests/data/dirty.cleaned.cleaned.zip')