Refactor lightweight mode implementation
This commit is contained in:
parent
6ce88b8b7f
commit
b832a59414
@ -19,6 +19,7 @@ class AbstractParser(abc.ABC):
|
|||||||
self.filename = filename
|
self.filename = filename
|
||||||
fname, extension = os.path.splitext(filename)
|
fname, extension = os.path.splitext(filename)
|
||||||
self.output_filename = fname + '.cleaned' + extension
|
self.output_filename = fname + '.cleaned' + extension
|
||||||
|
self.lightweight_cleaning = False
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def get_meta(self) -> Dict[str, str]:
|
def get_meta(self) -> Dict[str, str]:
|
||||||
@ -27,10 +28,3 @@ class AbstractParser(abc.ABC):
|
|||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def remove_all(self) -> bool:
|
def remove_all(self) -> bool:
|
||||||
pass # pragma: no cover
|
pass # pragma: no cover
|
||||||
|
|
||||||
def remove_all_lightweight(self) -> bool:
|
|
||||||
""" This method removes _SOME_ metadata.
|
|
||||||
It might be useful to implement it for fileformats that do
|
|
||||||
not support non-destructive cleaning.
|
|
||||||
"""
|
|
||||||
return self.remove_all()
|
|
||||||
|
@ -37,7 +37,12 @@ class PDFParser(abstract.AbstractParser):
|
|||||||
except GLib.GError: # Invalid PDF
|
except GLib.GError: # Invalid PDF
|
||||||
raise ValueError
|
raise ValueError
|
||||||
|
|
||||||
def remove_all_lightweight(self):
|
def remove_all(self) -> bool:
|
||||||
|
if self.lightweight_cleaning is True:
|
||||||
|
return self.__remove_all_lightweight()
|
||||||
|
return self.__remove_all_thorough()
|
||||||
|
|
||||||
|
def __remove_all_lightweight(self) -> bool:
|
||||||
"""
|
"""
|
||||||
Load the document into Poppler, render pages on a new PDFSurface.
|
Load the document into Poppler, render pages on a new PDFSurface.
|
||||||
"""
|
"""
|
||||||
@ -64,7 +69,7 @@ class PDFParser(abstract.AbstractParser):
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def remove_all(self):
|
def __remove_all_thorough(self) -> bool:
|
||||||
"""
|
"""
|
||||||
Load the document into Poppler, render pages on PNG,
|
Load the document into Poppler, render pages on PNG,
|
||||||
and shove those PNG into a new PDF.
|
and shove those PNG into a new PDF.
|
||||||
|
3
mat2
3
mat2
@ -94,8 +94,7 @@ def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy)
|
|||||||
print("[-] %s's format (%s) is not supported" % (filename, mtype))
|
print("[-] %s's format (%s) is not supported" % (filename, mtype))
|
||||||
return False
|
return False
|
||||||
p.unknown_member_policy = policy
|
p.unknown_member_policy = policy
|
||||||
if is_lightweight:
|
p.lightweight_cleaning = is_lightweight
|
||||||
return p.remove_all_lightweight()
|
|
||||||
return p.remove_all()
|
return p.remove_all()
|
||||||
|
|
||||||
|
|
||||||
|
@ -190,7 +190,8 @@ class TestLightWeightCleaning(unittest.TestCase):
|
|||||||
meta = p.get_meta()
|
meta = p.get_meta()
|
||||||
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
|
self.assertEqual(meta['producer'], 'pdfTeX-1.40.14')
|
||||||
|
|
||||||
ret = p.remove_all_lightweight()
|
p.lightweight_cleaning = True
|
||||||
|
ret = p.remove_all()
|
||||||
self.assertTrue(ret)
|
self.assertTrue(ret)
|
||||||
|
|
||||||
p = pdf.PDFParser('./tests/data/clean.cleaned.pdf')
|
p = pdf.PDFParser('./tests/data/clean.cleaned.pdf')
|
||||||
@ -207,7 +208,8 @@ class TestLightWeightCleaning(unittest.TestCase):
|
|||||||
meta = p.get_meta()
|
meta = p.get_meta()
|
||||||
self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
|
self.assertEqual(meta['Comment'], 'This is a comment, be careful!')
|
||||||
|
|
||||||
ret = p.remove_all_lightweight()
|
p.lightweight_cleaning = True
|
||||||
|
ret = p.remove_all()
|
||||||
self.assertTrue(ret)
|
self.assertTrue(ret)
|
||||||
|
|
||||||
p = images.PNGParser('./tests/data/clean.cleaned.png')
|
p = images.PNGParser('./tests/data/clean.cleaned.png')
|
||||||
|
Loading…
Reference in New Issue
Block a user