1
0
mirror of synced 2024-11-25 10:44:23 +01:00

In archive-based formats, the mimetype file comes first

This should improve epub compatibility,
along with other formats as a side-effect
This commit is contained in:
jvoisin 2019-02-24 23:03:17 +01:00
parent 524bae5972
commit 545dccc352
2 changed files with 14 additions and 2 deletions

View File

@ -4,13 +4,14 @@ import tempfile
import os
import logging
import shutil
from typing import Dict, Set, Pattern, Union, Any
from typing import Dict, Set, Pattern, Union, Any, List
from . import abstract, UnknownMemberPolicy, parser_factory
# Make pyflakes happy
assert Set
assert Pattern
assert List
assert Union
@ -115,9 +116,16 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
temp_folder = tempfile.mkdtemp()
abort = False
items = list() # type: List[zipfile.ZipInfo]
for item in sorted(zin.infolist(), key=lambda z: z.filename):
if item.filename == 'mimetype':
items = [item] + items
else:
items.append(item)
# Since files order is a fingerprint factor,
# we're iterating (and thus inserting) them in lexicographic order.
for item in sorted(zin.infolist(), key=lambda z: z.filename):
for item in items:
if item.filename[-1] == '/': # `is_dir` is added in Python3.6
continue # don't keep empty folders

View File

@ -83,6 +83,8 @@ class TestZipOrder(unittest.TestCase):
previous_name = ''
for item in zin.infolist():
if previous_name == '':
if item.filename == 'mimetype':
continue
previous_name = item.filename
continue
elif item.filename < previous_name:
@ -97,6 +99,8 @@ class TestZipOrder(unittest.TestCase):
previous_name = ''
for item in zin.infolist():
if previous_name == '':
if item.filename == 'mimetype':
continue
previous_name = item.filename
continue
self.assertGreaterEqual(item.filename, previous_name)