Refactor lightweight mode implementation
This commit is contained in:
parent
6ce88b8b7f
commit
b832a59414
@ -19,6 +19,7 @@ class AbstractParser(abc.ABC):
|
||||
self.filename = filename
|
||||
fname, extension = os.path.splitext(filename)
|
||||
self.output_filename = fname + '.cleaned' + extension
|
||||
self.lightweight_cleaning = False
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_meta(self) -> Dict[str, str]:
|
||||
@ -27,10 +28,3 @@ class AbstractParser(abc.ABC):
|
||||
@abc.abstractmethod
|
||||
def remove_all(self) -> bool:
|
||||
pass # pragma: no cover
|
||||
|
||||
def remove_all_lightweight(self) -> bool:
|
||||
""" This method removes _SOME_ metadata.
|
||||
It might be useful to implement it for fileformats that do
|
||||
not support non-destructive cleaning.
|
||||
"""
|
||||
return self.remove_all()
|
||||
|
@ -37,7 +37,12 @@ class PDFParser(abstract.AbstractParser):
|
||||
except GLib.GError: # Invalid PDF
|
||||
raise ValueError
|
||||
|
||||
def remove_all_lightweight(self):
|
||||
def remove_all(self) -> bool:
|
||||
if self.lightweight_cleaning is True:
|
||||
return self.__remove_all_lightweight()
|
||||
return self.__remove_all_thorough()
|
||||
|
||||
def __remove_all_lightweight(self) -> bool:
|
||||
"""
|
||||
Load the document into Poppler, render pages on a new PDFSurface.
|
||||
"""
|
||||
@ -64,7 +69,7 @@ class PDFParser(abstract.AbstractParser):
|
||||
|
||||
return True
|
||||
|
||||
def remove_all(self):
|
||||
def __remove_all_thorough(self) -> bool:
|
||||
"""
|
||||
Load the document into Poppler, render pages on PNG,
|
||||
and shove those PNG into a new PDF.
|
||||
|
3
mat2
3
mat2
@ -94,8 +94,7 @@ def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy)
|
||||
print("[-] %s's format (%s) is not supported" % (filename, mtype))
|
||||
return False
|
||||
p.unknown_member_policy = policy
|
||||
if is_lightweight:
|
||||
return p.remove_all_lightweight()
|
||||
p.lightweight_cleaning = is_lightweight
|
||||
return p.remove_all()
|
||||
|
||||
|
||||
|
@ -190,7 +190,8 @@ class TestLightWeightCleaning(unittest.TestCase):
|
||||
meta = p.get_meta()
|
||||
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
|
||||
|
||||
ret = p.remove_all_lightweight()
|
||||
p.lightweight_cleaning = True
|
||||
ret = p.remove_all()
|
||||
self.assertTrue(ret)
|
||||
|
||||
p = pdf.PDFParser('./tests/data/clean.cleaned.pdf')
|
||||
@ -207,7 +208,8 @@ class TestLightWeightCleaning(unittest.TestCase):
|
||||
meta = p.get_meta()
|
||||
self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
|
||||
|
||||
ret = p.remove_all_lightweight()
|
||||
p.lightweight_cleaning = True
|
||||
ret = p.remove_all()
|
||||
self.assertTrue(ret)
|
||||
|
||||
p = images.PNGParser('./tests/data/clean.cleaned.png')
|
||||
|
Loading…
Reference in New Issue
Block a user