From b832a5941458083dd6147efb652036552f95b786 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 12 Oct 2018 11:49:24 +0200 Subject: [PATCH] Refactor lightweight mode implementation --- libmat2/abstract.py | 8 +------- libmat2/pdf.py | 9 +++++++-- mat2 | 3 +-- tests/test_libmat2.py | 6 ++++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/libmat2/abstract.py b/libmat2/abstract.py index cd72f2c..5bcaa69 100644 --- a/libmat2/abstract.py +++ b/libmat2/abstract.py @@ -19,6 +19,7 @@ class AbstractParser(abc.ABC): self.filename = filename fname, extension = os.path.splitext(filename) self.output_filename = fname + '.cleaned' + extension + self.lightweight_cleaning = False @abc.abstractmethod def get_meta(self) -> Dict[str, str]: @@ -27,10 +28,3 @@ class AbstractParser(abc.ABC): @abc.abstractmethod def remove_all(self) -> bool: pass # pragma: no cover - - def remove_all_lightweight(self) -> bool: - """ This method removes _SOME_ metadata. - It might be useful to implement it for fileformats that do - not support non-destructive cleaning. - """ - return self.remove_all() diff --git a/libmat2/pdf.py b/libmat2/pdf.py index c8769aa..140b4f4 100644 --- a/libmat2/pdf.py +++ b/libmat2/pdf.py @@ -37,7 +37,12 @@ class PDFParser(abstract.AbstractParser): except GLib.GError: # Invalid PDF raise ValueError - def remove_all_lightweight(self): + def remove_all(self) -> bool: + if self.lightweight_cleaning is True: + return self.__remove_all_lightweight() + return self.__remove_all_thorough() + + def __remove_all_lightweight(self) -> bool: """ Load the document into Poppler, render pages on a new PDFSurface. """ @@ -64,7 +69,7 @@ class PDFParser(abstract.AbstractParser): return True - def remove_all(self): + def __remove_all_thorough(self) -> bool: """ Load the document into Poppler, render pages on PNG, and shove those PNG into a new PDF. diff --git a/mat2 b/mat2 index b4a6033..ba1f0ac 100755 --- a/mat2 +++ b/mat2 @@ -94,8 +94,7 @@ def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy) print("[-] %s's format (%s) is not supported" % (filename, mtype)) return False p.unknown_member_policy = policy - if is_lightweight: - return p.remove_all_lightweight() + p.lightweight_cleaning = is_lightweight return p.remove_all() diff --git a/tests/test_libmat2.py b/tests/test_libmat2.py index 6a2af91..665bab0 100644 --- a/tests/test_libmat2.py +++ b/tests/test_libmat2.py @@ -190,7 +190,8 @@ class TestLightWeightCleaning(unittest.TestCase): meta = p.get_meta() self.assertEqual(meta['producer'], 'pdfTeX-1.40.14') - ret = p.remove_all_lightweight() + p.lightweight_cleaning = True + ret = p.remove_all() self.assertTrue(ret) p = pdf.PDFParser('./tests/data/clean.cleaned.pdf') @@ -207,7 +208,8 @@ class TestLightWeightCleaning(unittest.TestCase): meta = p.get_meta() self.assertEqual(meta['Comment'], 'This is a comment, be careful!') - ret = p.remove_all_lightweight() + p.lightweight_cleaning = True + ret = p.remove_all() self.assertTrue(ret) p = images.PNGParser('./tests/data/clean.cleaned.png')