From f78aad78ef3df7e76d989675b45a7d4e1a9314b7 Mon Sep 17 00:00:00 2001 From: Bryan Bishop Date: Tue, 5 Feb 2013 17:20:11 -0600 Subject: [PATCH] AIP: better false-positives check --- pdfparanoia/plugins/aip.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pdfparanoia/plugins/aip.py b/pdfparanoia/plugins/aip.py index 0ae3ec1..20457f2 100644 --- a/pdfparanoia/plugins/aip.py +++ b/pdfparanoia/plugins/aip.py @@ -35,12 +35,15 @@ class AmericanInstituteOfPhysics(Plugin): if hasattr(obj, "attrs"): # watermarks tend to be in FlateDecode elements if obj.attrs.has_key("Filter") and str(obj.attrs["Filter"]) == "/FlateDecode": - #length = obj.attrs["Length"] - #rawdata = copy(obj.rawdata) - data = copy(obj.get_data()) + length = obj.attrs["Length"] - if "Redistribution subject to AIP license or copyright" in data: - evil_ids.append(objid) + # the watermark is never very long + if length < 1000: + #rawdata = copy(obj.rawdata) + data = copy(obj.get_data()) + + if "Redistribution subject to AIP license or copyright" in data: + evil_ids.append(objid) for objid in evil_ids: content = remove_object_by_id(content, objid)