diff --git a/libmat2/archive.py b/libmat2/archive.py index 48c1594..29db417 100644 --- a/libmat2/archive.py +++ b/libmat2/archive.py @@ -120,6 +120,18 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): # pylint: disable=unused-argument return member + @staticmethod + def _get_member_compression(member: ArchiveMember): + """Get the compression of the archive member.""" + # pylint: disable=unused-argument + return None + + @staticmethod + def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember: + """Set the compression of the archive member.""" + # pylint: disable=unused-argument + return member + def get_meta(self) -> Dict[str, Union[str, dict]]: meta = dict() # type: Dict[str, Union[str, dict]] @@ -184,6 +196,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): original_permissions = os.stat(full_path).st_mode os.chmod(full_path, original_permissions | stat.S_IWUSR | stat.S_IRUSR) + original_compression = self._get_member_compression(item) + if self._specific_cleanup(full_path) is False: logging.warning("Something went wrong during deep cleaning of %s", member_name) @@ -223,6 +237,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): zinfo = self.member_class(member_name) # type: ignore zinfo = self._set_member_permissions(zinfo, original_permissions) + zinfo = self._set_member_compression(zinfo, original_compression) clean_zinfo = self._clean_member(zinfo) self._add_file_to_archive(zout, clean_zinfo, full_path) @@ -368,7 +383,6 @@ class ZipParser(ArchiveBasedAbstractParser): super().__init__(filename) self.archive_class = zipfile.ZipFile self.member_class = zipfile.ZipInfo - self.zip_compression_type = zipfile.ZIP_DEFLATED def is_archive_valid(self): try: @@ -410,7 +424,7 @@ class ZipParser(ArchiveBasedAbstractParser): assert isinstance(member, zipfile.ZipInfo) # please mypy with open(full_path, 'rb') as f: archive.writestr(member, f.read(), - compress_type=self.zip_compression_type) + compress_type=member.compress_type) @staticmethod def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]: @@ -421,3 +435,12 @@ class ZipParser(ArchiveBasedAbstractParser): def _get_member_name(member: ArchiveMember) -> str: assert isinstance(member, zipfile.ZipInfo) # please mypy return member.filename + + @staticmethod + def _get_member_compression(member: ArchiveMember): + return member.compress_type + + @staticmethod + def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember: + member.compress_type = compression + return member diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index af92db3..f8e62de 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -175,14 +175,30 @@ class TestGetMeta(unittest.TestCase): def test_zip(self): with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout: - zout.write('./tests/data/dirty.flac') - zout.write('./tests/data/dirty.docx') - zout.write('./tests/data/dirty.jpg') + zout.write('./tests/data/dirty.flac', + compress_type = zipfile.ZIP_STORED) + zout.write('./tests/data/dirty.docx', + compress_type = zipfile.ZIP_DEFLATED) + zout.write('./tests/data/dirty.jpg', + compress_type = zipfile.ZIP_BZIP2) + zout.write('./tests/data/dirty.txt', + compress_type = zipfile.ZIP_LZMA) p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip') self.assertEqual(mimetype, 'application/zip') meta = p.get_meta() self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') + + with zipfile.ZipFile('./tests/data/dirty.zip') as zipin: + members = { + 'tests/data/dirty.flac' : zipfile.ZIP_STORED, + 'tests/data/dirty.docx': zipfile.ZIP_DEFLATED, + 'tests/data/dirty.jpg' : zipfile.ZIP_BZIP2, + 'tests/data/dirty.txt' : zipfile.ZIP_LZMA, + } + for k, v in members.items(): + self.assertEqual(zipin.getinfo(k).compress_type, v) + os.remove('./tests/data/dirty.zip') def test_wmv(self): @@ -595,9 +611,14 @@ class TestCleaning(unittest.TestCase): class TestCleaningArchives(unittest.TestCase): def test_zip(self): with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout: - zout.write('./tests/data/dirty.flac') - zout.write('./tests/data/dirty.docx') - zout.write('./tests/data/dirty.jpg') + zout.write('./tests/data/dirty.flac', + compress_type = zipfile.ZIP_STORED) + zout.write('./tests/data/dirty.docx', + compress_type = zipfile.ZIP_DEFLATED) + zout.write('./tests/data/dirty.jpg', + compress_type = zipfile.ZIP_BZIP2) + zout.write('./tests/data/dirty.txt', + compress_type = zipfile.ZIP_LZMA) p = archive.ZipParser('./tests/data/dirty.zip') meta = p.get_meta() self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') @@ -609,6 +630,16 @@ class TestCleaningArchives(unittest.TestCase): self.assertEqual(p.get_meta(), {}) self.assertTrue(p.remove_all()) + with zipfile.ZipFile('./tests/data/dirty.zip') as zipin: + members = { + 'tests/data/dirty.flac' : zipfile.ZIP_STORED, + 'tests/data/dirty.docx': zipfile.ZIP_DEFLATED, + 'tests/data/dirty.jpg' : zipfile.ZIP_BZIP2, + 'tests/data/dirty.txt' : zipfile.ZIP_LZMA, + } + for k, v in members.items(): + self.assertEqual(zipin.getinfo(k).compress_type, v) + os.remove('./tests/data/dirty.zip') os.remove('./tests/data/dirty.cleaned.zip') os.remove('./tests/data/dirty.cleaned.cleaned.zip')