From 503b8aead5dfc899f2b2944af6b0d789f211a698 Mon Sep 17 00:00:00 2001
From: Zooko O'Whielacronx <zooko@zooko.com>
Date: Wed, 13 Feb 2013 21:08:49 +0000
Subject: [PATCH] add -v -v mode which prints out the details (potentially
 sensitive, potentially bulky)

remove spie, which appears to do nothing
---
 bin/pdfparanoia                                 |  6 +++---
 pdfparanoia/{plugins => plugins-broken}/spie.py |  0
 pdfparanoia/plugins/__init__.py                 |  1 -
 pdfparanoia/plugins/aip.py                      |  6 ++++--
 pdfparanoia/plugins/ieee.py                     |  6 ++++--
 pdfparanoia/plugins/jstor.py                    | 14 ++++++++++----
 6 files changed, 21 insertions(+), 12 deletions(-)
 rename pdfparanoia/{plugins => plugins-broken}/spie.py (100%)

diff --git a/bin/pdfparanoia b/bin/pdfparanoia
index 122e59a..749fa96 100755
--- a/bin/pdfparanoia
+++ b/bin/pdfparanoia
@@ -13,13 +13,13 @@ if __name__ == "__main__":
     import fileinput
     from StringIO import StringIO
 
-    verbose = False
+    verbose = 0
     while '--verbose' in sys.argv:
-        verbose = True
+        verbose += 1
         sys.argv.pop(sys.argv.index('--verbose'))
 
     while '-v' in sys.argv:
-        verbose = True
+        verbose += 1
         sys.argv.pop(sys.argv.index('-v'))
 
     import pdfparanoia
diff --git a/pdfparanoia/plugins/spie.py b/pdfparanoia/plugins-broken/spie.py
similarity index 100%
rename from pdfparanoia/plugins/spie.py
rename to pdfparanoia/plugins-broken/spie.py
diff --git a/pdfparanoia/plugins/__init__.py b/pdfparanoia/plugins/__init__.py
index 93a425b..10179eb 100644
--- a/pdfparanoia/plugins/__init__.py
+++ b/pdfparanoia/plugins/__init__.py
@@ -10,5 +10,4 @@ Scrubbing machines. Bubbles mandatory.
 from .aip import *
 from .ieee import *
 from .jstor import *
-from .spie import *
 
diff --git a/pdfparanoia/plugins/aip.py b/pdfparanoia/plugins/aip.py
index 685b545..d9d995d 100644
--- a/pdfparanoia/plugins/aip.py
+++ b/pdfparanoia/plugins/aip.py
@@ -18,7 +18,7 @@ class AmericanInstituteOfPhysics(Plugin):
     """
 
     @classmethod
-    def scrub(cls, content, verbose=False):
+    def scrub(cls, content, verbose=0):
         evil_ids = []
 
         # parse the pdf into a pdfminer document
@@ -46,7 +46,9 @@ class AmericanInstituteOfPhysics(Plugin):
 
                         phrase="Redistribution subject to AIP license or copyright"
                         if phrase in data:
-                            if verbose:
+                            if verbose >= 2:
+                                sys.stderr.write("%s: Found object %s with %r: %r; omitting..." % (cls.__name__, objid, phrase, data))
+                            elif verbose >= 1:
                                 sys.stderr.write("%s: Found object %s with %r; omitting..." % (cls.__name__, objid, phrase,))
 
                             evil_ids.append(objid)
diff --git a/pdfparanoia/plugins/ieee.py b/pdfparanoia/plugins/ieee.py
index 847b1d0..0a8691b 100644
--- a/pdfparanoia/plugins/ieee.py
+++ b/pdfparanoia/plugins/ieee.py
@@ -15,7 +15,7 @@ class IEEEXplore(Plugin):
     """
 
     @classmethod
-    def scrub(cls, content, verbose=False):
+    def scrub(cls, content, verbose=0):
         evil_ids = []
 
         # parse the pdf into a pdfminer document
@@ -40,7 +40,9 @@ class IEEEXplore(Plugin):
 
                     phrase= "Authorized licensed use limited to: "
                     if phrase in data:
-                        if verbose:
+                        if verbose >= 2:
+                            sys.stderr.write("%s: Found object %s with %r: %r; omitting..." % (cls.__name__, objid, phrase, data[data.index(phrase):data.index(phrase)+1000]))
+                        elif verbose >= 1:
                             sys.stderr.write("%s: Found object %s with %r; omitting..." % (cls.__name__, objid, phrase,))
 
                         evil_ids.append(objid)
diff --git a/pdfparanoia/plugins/jstor.py b/pdfparanoia/plugins/jstor.py
index d368fee..0ca971d 100644
--- a/pdfparanoia/plugins/jstor.py
+++ b/pdfparanoia/plugins/jstor.py
@@ -34,7 +34,7 @@ class JSTOR(Plugin):
     ]
 
     @classmethod
-    def scrub(cls, content, verbose=False):
+    def scrub(cls, content, verbose=0):
         replacements = []
 
         # jstor has certain watermarks only on the first page
@@ -61,13 +61,13 @@ class JSTOR(Plugin):
                     if all([requirement in data for requirement in JSTOR.requirements]):
                         better_content = data
 
-                        if verbose:
-                            sys.stderr.write("%s: Found object %s with %r; omitting..." % (cls.__name__, objid, cls.requirements))
-
                         # remove the date
                         startpos = better_content.find("This content downloaded ")
                         endpos = better_content.find(")", startpos)
                         segment = better_content[startpos:endpos]
+                        if verbose >= 2 and replacements:
+                            sys.stderr.write("%s: Found object %s with %r: %r; omitting..." % (cls.__name__, objid, cls.requirements, segment))
+
                         better_content = better_content.replace(segment, "")
 
                         # it looks like all of the watermarks are at the end?
@@ -85,12 +85,18 @@ class JSTOR(Plugin):
                             startpos = better_content.rfind("/F2 11 Tf\n")
                             endpos = better_content.find("Tf\n", startpos+5)
 
+                            if verbose >= 2 and replacements:
+                                sys.stderr.write("%s: Found object %s with %r: %r; omitting..." % (cls.__name__, objid, cls.requirements, better_content[startpos:endpos]))
+
                             better_content = better_content[0:startpos] + better_content[endpos:]
 
                         replacements.append([objid, better_content])
 
                         page_id += 1
 
+        if verbose >= 1 and replacements:
+            sys.stderr.write("%s: Found objects %s with %r; omitting..." % (cls.__name__, [deets[0] for deets in replacements], cls.requirements))
+
         for deets in replacements:
             objid = deets[0]
             replacement = deets[1]