Make pylint even happier
This commit is contained in:
parent
86fe3aa584
commit
080d6769ca
@ -20,18 +20,18 @@ assert Pattern
|
|||||||
def _parse_xml(full_path: str):
|
def _parse_xml(full_path: str):
|
||||||
""" This function parse XML with namespace support. """
|
""" This function parse XML with namespace support. """
|
||||||
def parse_map(f): # etree support for ns is a bit rough
|
def parse_map(f): # etree support for ns is a bit rough
|
||||||
ns_map = dict()
|
namespace_map = dict()
|
||||||
for _, (k, v) in ET.iterparse(f, ("start-ns", )):
|
for _, (key, value) in ET.iterparse(f, ("start-ns", )):
|
||||||
ns_map[k] = v
|
namespace_map[key] = value
|
||||||
return ns_map
|
return namespace_map
|
||||||
|
|
||||||
ns = parse_map(full_path)
|
namespace_map = parse_map(full_path)
|
||||||
|
|
||||||
# Register the namespaces
|
# Register the namespaces
|
||||||
for k, v in ns.items():
|
for key, value in namespace_map.items():
|
||||||
ET.register_namespace(k, v)
|
ET.register_namespace(key, value)
|
||||||
|
|
||||||
return ET.parse(full_path), ns
|
return ET.parse(full_path), namespace_map
|
||||||
|
|
||||||
|
|
||||||
class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
||||||
@ -53,15 +53,18 @@ class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
|||||||
def _specific_cleanup(self, full_path: str) -> bool:
|
def _specific_cleanup(self, full_path: str) -> bool:
|
||||||
""" This method can be used to apply specific treatment
|
""" This method can be used to apply specific treatment
|
||||||
to files present in the archive."""
|
to files present in the archive."""
|
||||||
|
# pylint: disable=unused-argument,no-self-use
|
||||||
return True # pragma: no cover
|
return True # pragma: no cover
|
||||||
|
|
||||||
def _clean_zipinfo(self, zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo:
|
@staticmethod
|
||||||
|
def _clean_zipinfo(zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo:
|
||||||
zipinfo.create_system = 3 # Linux
|
zipinfo.create_system = 3 # Linux
|
||||||
zipinfo.comment = b''
|
zipinfo.comment = b''
|
||||||
zipinfo.date_time = (1980, 1, 1, 0, 0, 0)
|
zipinfo.date_time = (1980, 1, 1, 0, 0, 0)
|
||||||
return zipinfo
|
return zipinfo
|
||||||
|
|
||||||
def _get_zipinfo_meta(self, zipinfo: zipfile.ZipInfo) -> Dict[str, str]:
|
@staticmethod
|
||||||
|
def _get_zipinfo_meta(zipinfo: zipfile.ZipInfo) -> Dict[str, str]:
|
||||||
metadata = {}
|
metadata = {}
|
||||||
if zipinfo.create_system == 3:
|
if zipinfo.create_system == 3:
|
||||||
#metadata['create_system'] = 'Linux'
|
#metadata['create_system'] = 'Linux'
|
||||||
@ -142,29 +145,30 @@ class MSOfficeParser(ArchiveBasedAbstractParser):
|
|||||||
'^docProps/',
|
'^docProps/',
|
||||||
}))
|
}))
|
||||||
|
|
||||||
def __remove_revisions(self, full_path: str) -> bool:
|
@staticmethod
|
||||||
|
def __remove_revisions(full_path: str) -> bool:
|
||||||
""" In this function, we're changing the XML
|
""" In this function, we're changing the XML
|
||||||
document in two times, since we don't want
|
document in two times, since we don't want
|
||||||
to change the tree we're iterating on."""
|
to change the tree we're iterating on."""
|
||||||
try:
|
try:
|
||||||
tree, ns = _parse_xml(full_path)
|
tree, namespace = _parse_xml(full_path)
|
||||||
except ET.ParseError:
|
except ET.ParseError:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# No revisions are present
|
# No revisions are present
|
||||||
del_presence = tree.find('.//w:del', ns)
|
del_presence = tree.find('.//w:del', namespace)
|
||||||
ins_presence = tree.find('.//w:ins', ns)
|
ins_presence = tree.find('.//w:ins', namespace)
|
||||||
if del_presence is None and ins_presence is None:
|
if del_presence is None and ins_presence is None:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
parent_map = {c:p for p in tree.iter() for c in p}
|
parent_map = {c:p for p in tree.iter() for c in p}
|
||||||
|
|
||||||
elements = list([element for element in tree.iterfind('.//w:del', ns)])
|
elements = list([element for element in tree.iterfind('.//w:del', namespace)])
|
||||||
for element in elements:
|
for element in elements:
|
||||||
parent_map[element].remove(element)
|
parent_map[element].remove(element)
|
||||||
|
|
||||||
elements = list()
|
elements = list()
|
||||||
for element in tree.iterfind('.//w:ins', ns):
|
for element in tree.iterfind('.//w:ins', namespace):
|
||||||
for position, item in enumerate(tree.iter()): #pragma: no cover
|
for position, item in enumerate(tree.iter()): #pragma: no cover
|
||||||
if item == element:
|
if item == element:
|
||||||
for children in element.iterfind('./*'):
|
for children in element.iterfind('./*'):
|
||||||
@ -231,17 +235,18 @@ class LibreOfficeParser(ArchiveBasedAbstractParser):
|
|||||||
}))
|
}))
|
||||||
|
|
||||||
|
|
||||||
def __remove_revisions(self, full_path: str) -> bool:
|
@staticmethod
|
||||||
|
def __remove_revisions(full_path: str) -> bool:
|
||||||
try:
|
try:
|
||||||
tree, ns = _parse_xml(full_path)
|
tree, namespace = _parse_xml(full_path)
|
||||||
except ET.ParseError:
|
except ET.ParseError:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if 'office' not in ns.keys(): # no revisions in the current file
|
if 'office' not in namespace.keys(): # no revisions in the current file
|
||||||
return True
|
return True
|
||||||
|
|
||||||
for text in tree.getroot().iterfind('.//office:text', ns):
|
for text in tree.getroot().iterfind('.//office:text', namespace):
|
||||||
for changes in text.iterfind('.//text:tracked-changes', ns):
|
for changes in text.iterfind('.//text:tracked-changes', namespace):
|
||||||
text.remove(changes)
|
text.remove(changes)
|
||||||
|
|
||||||
tree.write(full_path, xml_declaration=True)
|
tree.write(full_path, xml_declaration=True)
|
||||||
|
Loading…
Reference in New Issue
Block a user