1
0
mirror of synced 2024-11-25 18:54:22 +01:00

In archive-based formats, the mimetype file comes first

This should improve epub compatibility,
along with other formats as a side-effect
This commit is contained in:
jvoisin 2019-02-24 23:03:17 +01:00
parent 524bae5972
commit 545dccc352
2 changed files with 14 additions and 2 deletions

View File

@ -4,13 +4,14 @@ import tempfile
import os import os
import logging import logging
import shutil import shutil
from typing import Dict, Set, Pattern, Union, Any from typing import Dict, Set, Pattern, Union, Any, List
from . import abstract, UnknownMemberPolicy, parser_factory from . import abstract, UnknownMemberPolicy, parser_factory
# Make pyflakes happy # Make pyflakes happy
assert Set assert Set
assert Pattern assert Pattern
assert List
assert Union assert Union
@ -115,9 +116,16 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
temp_folder = tempfile.mkdtemp() temp_folder = tempfile.mkdtemp()
abort = False abort = False
items = list() # type: List[zipfile.ZipInfo]
for item in sorted(zin.infolist(), key=lambda z: z.filename):
if item.filename == 'mimetype':
items = [item] + items
else:
items.append(item)
# Since files order is a fingerprint factor, # Since files order is a fingerprint factor,
# we're iterating (and thus inserting) them in lexicographic order. # we're iterating (and thus inserting) them in lexicographic order.
for item in sorted(zin.infolist(), key=lambda z: z.filename): for item in items:
if item.filename[-1] == '/': # `is_dir` is added in Python3.6 if item.filename[-1] == '/': # `is_dir` is added in Python3.6
continue # don't keep empty folders continue # don't keep empty folders

View File

@ -83,6 +83,8 @@ class TestZipOrder(unittest.TestCase):
previous_name = '' previous_name = ''
for item in zin.infolist(): for item in zin.infolist():
if previous_name == '': if previous_name == '':
if item.filename == 'mimetype':
continue
previous_name = item.filename previous_name = item.filename
continue continue
elif item.filename < previous_name: elif item.filename < previous_name:
@ -97,6 +99,8 @@ class TestZipOrder(unittest.TestCase):
previous_name = '' previous_name = ''
for item in zin.infolist(): for item in zin.infolist():
if previous_name == '': if previous_name == '':
if item.filename == 'mimetype':
continue
previous_name = item.filename previous_name = item.filename
continue continue
self.assertGreaterEqual(item.filename, previous_name) self.assertGreaterEqual(item.filename, previous_name)