1
0
mirror of synced 2024-11-22 09:14:23 +01:00

Refactor lightweight mode implementation

This commit is contained in:
jvoisin 2018-10-12 11:49:24 +02:00
parent 6ce88b8b7f
commit b832a59414
4 changed files with 13 additions and 13 deletions

View File

@ -19,6 +19,7 @@ class AbstractParser(abc.ABC):
self.filename = filename self.filename = filename
fname, extension = os.path.splitext(filename) fname, extension = os.path.splitext(filename)
self.output_filename = fname + '.cleaned' + extension self.output_filename = fname + '.cleaned' + extension
self.lightweight_cleaning = False
@abc.abstractmethod @abc.abstractmethod
def get_meta(self) -> Dict[str, str]: def get_meta(self) -> Dict[str, str]:
@ -27,10 +28,3 @@ class AbstractParser(abc.ABC):
@abc.abstractmethod @abc.abstractmethod
def remove_all(self) -> bool: def remove_all(self) -> bool:
pass # pragma: no cover pass # pragma: no cover
def remove_all_lightweight(self) -> bool:
""" This method removes _SOME_ metadata.
It might be useful to implement it for fileformats that do
not support non-destructive cleaning.
"""
return self.remove_all()

View File

@ -37,7 +37,12 @@ class PDFParser(abstract.AbstractParser):
except GLib.GError: # Invalid PDF except GLib.GError: # Invalid PDF
raise ValueError raise ValueError
def remove_all_lightweight(self): def remove_all(self) -> bool:
if self.lightweight_cleaning is True:
return self.__remove_all_lightweight()
return self.__remove_all_thorough()
def __remove_all_lightweight(self) -> bool:
""" """
Load the document into Poppler, render pages on a new PDFSurface. Load the document into Poppler, render pages on a new PDFSurface.
""" """
@ -64,7 +69,7 @@ class PDFParser(abstract.AbstractParser):
return True return True
def remove_all(self): def __remove_all_thorough(self) -> bool:
""" """
Load the document into Poppler, render pages on PNG, Load the document into Poppler, render pages on PNG,
and shove those PNG into a new PDF. and shove those PNG into a new PDF.

3
mat2
View File

@ -94,8 +94,7 @@ def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy)
print("[-] %s's format (%s) is not supported" % (filename, mtype)) print("[-] %s's format (%s) is not supported" % (filename, mtype))
return False return False
p.unknown_member_policy = policy p.unknown_member_policy = policy
if is_lightweight: p.lightweight_cleaning = is_lightweight
return p.remove_all_lightweight()
return p.remove_all() return p.remove_all()

View File

@ -190,7 +190,8 @@ class TestLightWeightCleaning(unittest.TestCase):
meta = p.get_meta() meta = p.get_meta()
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14') self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
ret = p.remove_all_lightweight() p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret) self.assertTrue(ret)
p = pdf.PDFParser('./tests/data/clean.cleaned.pdf') p = pdf.PDFParser('./tests/data/clean.cleaned.pdf')
@ -207,7 +208,8 @@ class TestLightWeightCleaning(unittest.TestCase):
meta = p.get_meta() meta = p.get_meta()
self.assertEqual(meta['Comment'], 'This is a comment, be careful!') self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
ret = p.remove_all_lightweight() p.lightweight_cleaning = True
ret = p.remove_all()
self.assertTrue(ret) self.assertTrue(ret)
p = images.PNGParser('./tests/data/clean.cleaned.png') p = images.PNGParser('./tests/data/clean.cleaned.png')