From 545dccc3527fcdf851b30b072ae6c7222b711777 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sun, 24 Feb 2019 23:03:17 +0100 Subject: [PATCH] In archive-based formats, the `mimetype` file comes first This should improve epub compatibility, along with other formats as a side-effect --- libmat2/archive.py | 12 ++++++++++-- tests/test_deep_cleaning.py | 4 ++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/libmat2/archive.py b/libmat2/archive.py index d155664..1ae3b45 100644 --- a/libmat2/archive.py +++ b/libmat2/archive.py @@ -4,13 +4,14 @@ import tempfile import os import logging import shutil -from typing import Dict, Set, Pattern, Union, Any +from typing import Dict, Set, Pattern, Union, Any, List from . import abstract, UnknownMemberPolicy, parser_factory # Make pyflakes happy assert Set assert Pattern +assert List assert Union @@ -115,9 +116,16 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser): temp_folder = tempfile.mkdtemp() abort = False + items = list() # type: List[zipfile.ZipInfo] + for item in sorted(zin.infolist(), key=lambda z: z.filename): + if item.filename == 'mimetype': + items = [item] + items + else: + items.append(item) + # Since files order is a fingerprint factor, # we're iterating (and thus inserting) them in lexicographic order. - for item in sorted(zin.infolist(), key=lambda z: z.filename): + for item in items: if item.filename[-1] == '/': # `is_dir` is added in Python3.6 continue # don't keep empty folders diff --git a/tests/test_deep_cleaning.py b/tests/test_deep_cleaning.py index 8466127..ccd4955 100644 --- a/tests/test_deep_cleaning.py +++ b/tests/test_deep_cleaning.py @@ -83,6 +83,8 @@ class TestZipOrder(unittest.TestCase): previous_name = '' for item in zin.infolist(): if previous_name == '': + if item.filename == 'mimetype': + continue previous_name = item.filename continue elif item.filename < previous_name: @@ -97,6 +99,8 @@ class TestZipOrder(unittest.TestCase): previous_name = '' for item in zin.infolist(): if previous_name == '': + if item.filename == 'mimetype': + continue previous_name = item.filename continue self.assertGreaterEqual(item.filename, previous_name)