zip archives: keep individual files compression type
While hardcoding the compression to zipfile.ZIP_DEFLATED works for most use cases of mat, being able to produce cleaned up uncompressed zip files is useful for content that cannot be compressed more. In addition it also enables to use mat2 for reproducible builds of Android bootanimation files file that don't support compression. Signed-off-by: Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org>
This commit is contained in:
parent
541b3c83b2
commit
1703ed6ebb
@ -120,6 +120,18 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
return member
|
return member
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_member_compression(member: ArchiveMember):
|
||||||
|
"""Get the compression of the archive member."""
|
||||||
|
# pylint: disable=unused-argument
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember:
|
||||||
|
"""Set the compression of the archive member."""
|
||||||
|
# pylint: disable=unused-argument
|
||||||
|
return member
|
||||||
|
|
||||||
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
def get_meta(self) -> Dict[str, Union[str, dict]]:
|
||||||
meta = dict() # type: Dict[str, Union[str, dict]]
|
meta = dict() # type: Dict[str, Union[str, dict]]
|
||||||
|
|
||||||
@ -184,6 +196,8 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
original_permissions = os.stat(full_path).st_mode
|
original_permissions = os.stat(full_path).st_mode
|
||||||
os.chmod(full_path, original_permissions | stat.S_IWUSR | stat.S_IRUSR)
|
os.chmod(full_path, original_permissions | stat.S_IWUSR | stat.S_IRUSR)
|
||||||
|
|
||||||
|
original_compression = self._get_member_compression(item)
|
||||||
|
|
||||||
if self._specific_cleanup(full_path) is False:
|
if self._specific_cleanup(full_path) is False:
|
||||||
logging.warning("Something went wrong during deep cleaning of %s",
|
logging.warning("Something went wrong during deep cleaning of %s",
|
||||||
member_name)
|
member_name)
|
||||||
@ -223,6 +237,7 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
|
|
||||||
zinfo = self.member_class(member_name) # type: ignore
|
zinfo = self.member_class(member_name) # type: ignore
|
||||||
zinfo = self._set_member_permissions(zinfo, original_permissions)
|
zinfo = self._set_member_permissions(zinfo, original_permissions)
|
||||||
|
zinfo = self._set_member_compression(zinfo, original_compression)
|
||||||
clean_zinfo = self._clean_member(zinfo)
|
clean_zinfo = self._clean_member(zinfo)
|
||||||
self._add_file_to_archive(zout, clean_zinfo, full_path)
|
self._add_file_to_archive(zout, clean_zinfo, full_path)
|
||||||
|
|
||||||
@ -368,7 +383,6 @@ class ZipParser(ArchiveBasedAbstractParser):
|
|||||||
super().__init__(filename)
|
super().__init__(filename)
|
||||||
self.archive_class = zipfile.ZipFile
|
self.archive_class = zipfile.ZipFile
|
||||||
self.member_class = zipfile.ZipInfo
|
self.member_class = zipfile.ZipInfo
|
||||||
self.zip_compression_type = zipfile.ZIP_DEFLATED
|
|
||||||
|
|
||||||
def is_archive_valid(self):
|
def is_archive_valid(self):
|
||||||
try:
|
try:
|
||||||
@ -410,7 +424,7 @@ class ZipParser(ArchiveBasedAbstractParser):
|
|||||||
assert isinstance(member, zipfile.ZipInfo) # please mypy
|
assert isinstance(member, zipfile.ZipInfo) # please mypy
|
||||||
with open(full_path, 'rb') as f:
|
with open(full_path, 'rb') as f:
|
||||||
archive.writestr(member, f.read(),
|
archive.writestr(member, f.read(),
|
||||||
compress_type=self.zip_compression_type)
|
compress_type=member.compress_type)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
|
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
|
||||||
@ -421,3 +435,12 @@ class ZipParser(ArchiveBasedAbstractParser):
|
|||||||
def _get_member_name(member: ArchiveMember) -> str:
|
def _get_member_name(member: ArchiveMember) -> str:
|
||||||
assert isinstance(member, zipfile.ZipInfo) # please mypy
|
assert isinstance(member, zipfile.ZipInfo) # please mypy
|
||||||
return member.filename
|
return member.filename
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_member_compression(member: ArchiveMember):
|
||||||
|
return member.compress_type
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember:
|
||||||
|
member.compress_type = compression
|
||||||
|
return member
|
||||||
|
@ -175,14 +175,30 @@ class TestGetMeta(unittest.TestCase):
|
|||||||
|
|
||||||
def test_zip(self):
|
def test_zip(self):
|
||||||
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
|
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
|
||||||
zout.write('./tests/data/dirty.flac')
|
zout.write('./tests/data/dirty.flac',
|
||||||
zout.write('./tests/data/dirty.docx')
|
compress_type = zipfile.ZIP_STORED)
|
||||||
zout.write('./tests/data/dirty.jpg')
|
zout.write('./tests/data/dirty.docx',
|
||||||
|
compress_type = zipfile.ZIP_DEFLATED)
|
||||||
|
zout.write('./tests/data/dirty.jpg',
|
||||||
|
compress_type = zipfile.ZIP_BZIP2)
|
||||||
|
zout.write('./tests/data/dirty.txt',
|
||||||
|
compress_type = zipfile.ZIP_LZMA)
|
||||||
p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip')
|
p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip')
|
||||||
self.assertEqual(mimetype, 'application/zip')
|
self.assertEqual(mimetype, 'application/zip')
|
||||||
meta = p.get_meta()
|
meta = p.get_meta()
|
||||||
self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
|
self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
|
||||||
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
|
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
|
||||||
|
|
||||||
|
with zipfile.ZipFile('./tests/data/dirty.zip') as zipin:
|
||||||
|
members = {
|
||||||
|
'tests/data/dirty.flac' : zipfile.ZIP_STORED,
|
||||||
|
'tests/data/dirty.docx': zipfile.ZIP_DEFLATED,
|
||||||
|
'tests/data/dirty.jpg' : zipfile.ZIP_BZIP2,
|
||||||
|
'tests/data/dirty.txt' : zipfile.ZIP_LZMA,
|
||||||
|
}
|
||||||
|
for k, v in members.items():
|
||||||
|
self.assertEqual(zipin.getinfo(k).compress_type, v)
|
||||||
|
|
||||||
os.remove('./tests/data/dirty.zip')
|
os.remove('./tests/data/dirty.zip')
|
||||||
|
|
||||||
def test_wmv(self):
|
def test_wmv(self):
|
||||||
@ -595,9 +611,14 @@ class TestCleaning(unittest.TestCase):
|
|||||||
class TestCleaningArchives(unittest.TestCase):
|
class TestCleaningArchives(unittest.TestCase):
|
||||||
def test_zip(self):
|
def test_zip(self):
|
||||||
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
|
with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
|
||||||
zout.write('./tests/data/dirty.flac')
|
zout.write('./tests/data/dirty.flac',
|
||||||
zout.write('./tests/data/dirty.docx')
|
compress_type = zipfile.ZIP_STORED)
|
||||||
zout.write('./tests/data/dirty.jpg')
|
zout.write('./tests/data/dirty.docx',
|
||||||
|
compress_type = zipfile.ZIP_DEFLATED)
|
||||||
|
zout.write('./tests/data/dirty.jpg',
|
||||||
|
compress_type = zipfile.ZIP_BZIP2)
|
||||||
|
zout.write('./tests/data/dirty.txt',
|
||||||
|
compress_type = zipfile.ZIP_LZMA)
|
||||||
p = archive.ZipParser('./tests/data/dirty.zip')
|
p = archive.ZipParser('./tests/data/dirty.zip')
|
||||||
meta = p.get_meta()
|
meta = p.get_meta()
|
||||||
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
|
self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
|
||||||
@ -609,6 +630,16 @@ class TestCleaningArchives(unittest.TestCase):
|
|||||||
self.assertEqual(p.get_meta(), {})
|
self.assertEqual(p.get_meta(), {})
|
||||||
self.assertTrue(p.remove_all())
|
self.assertTrue(p.remove_all())
|
||||||
|
|
||||||
|
with zipfile.ZipFile('./tests/data/dirty.zip') as zipin:
|
||||||
|
members = {
|
||||||
|
'tests/data/dirty.flac' : zipfile.ZIP_STORED,
|
||||||
|
'tests/data/dirty.docx': zipfile.ZIP_DEFLATED,
|
||||||
|
'tests/data/dirty.jpg' : zipfile.ZIP_BZIP2,
|
||||||
|
'tests/data/dirty.txt' : zipfile.ZIP_LZMA,
|
||||||
|
}
|
||||||
|
for k, v in members.items():
|
||||||
|
self.assertEqual(zipin.getinfo(k).compress_type, v)
|
||||||
|
|
||||||
os.remove('./tests/data/dirty.zip')
|
os.remove('./tests/data/dirty.zip')
|
||||||
os.remove('./tests/data/dirty.cleaned.zip')
|
os.remove('./tests/data/dirty.cleaned.zip')
|
||||||
os.remove('./tests/data/dirty.cleaned.cleaned.zip')
|
os.remove('./tests/data/dirty.cleaned.cleaned.zip')
|
||||||
|
Loading…
Reference in New Issue
Block a user