Merge pull request #23 from cathalgarvey/master

Updated terminal script to use argparse.
2025-07-15 14:08:21 +02:00 · 2013-03-24 23:12:57 -07:00 · 2013-03-24 23:12:57 -07:00 · 6abfe2a380
commit 6abfe2a380
parent 0d1da12f71 db514ff744
3 changed files with 29 additions and 26 deletions
--- a/README.md
+++ b/README.md
@ -16,6 +16,9 @@ or,
 sudo python setup.py install
 ```

+pdfparanoia is written for python2.7+ or python 3.
+You will also need to manually install "pdfminer" if you do not use pip to install pdfparanoia.
+
 ## Usage

 ``` python
@ -23,15 +26,14 @@ import pdfparanoia

 pdf = pdfparanoia.scrub(open("nmat91417.pdf", "rb"))

-file_handler = open("output.pdf", "wb")
-file_handler.write(pdf)
-file_handler.close()
+with open("output.pdf", "wb") as file_handler:
+    file_handler.write(pdf)
 ```

 or from the shell,

 ``` bash
-pdfparanoia --verbose input.pdf > output.pdf
+pdfparanoia --verbose input.pdf -o output.pdf
 ```

 and,
--- a/bin/pdfparanoia
+++ b/bin/pdfparanoia
@ -10,28 +10,29 @@ stdin/piping or by referencing a file in argv[0].

 if __name__ == "__main__":
    import sys
-    import fileinput
+    import pdfparanoia
+    import argparse
    from StringIO import StringIO

+    ArgP = argparse.ArgumentParser(description="pdfparanoia is a PDF watermark removal library for academic papers. Some publishers include private information like institution names, personal names, ip addresses, timestamps and other identifying information in watermarks on each page.")
+    ArgP.add_argument('in_pdf', nargs='?', type=argparse.FileType('rb'),
+                        default='-') # argparse.FileType interprets "-" as Stdin.
+    ArgP.add_argument("-o", "--output", type=argparse.FileType('wb'),
+                        default=sys.stdout)
+    ArgP.add_argument("-v", "--verbose", action="store_true", default=False,
+                      help="Output more information, which may be sensitive or excessive.")
+    ArgP.add_argument("-V", "--more-verbose", action="store_true", default=False,
+                      help="Output even more information. Implies -v.")
+    Args = ArgP.parse_args()
+
    verbose = 0
-    while '--verbose' in sys.argv:
-        verbose += 1
-        sys.argv.pop(sys.argv.index('--verbose'))
-
-    while '-v' in sys.argv:
-        verbose += 1
-        sys.argv.pop(sys.argv.index('-v'))
-
-    import pdfparanoia
-
-    # read in all lines
-    content = ""
-    for line in fileinput.input():
-        content += line
-
-    # scrub the pdf to get rid of watermarks
-    output = pdfparanoia.scrub(StringIO(content), verbose=verbose)
-
-    # dump to output
-    sys.stdout.write(output)
+    if Args.verbose: verbose = 1
+    if Args.more_verbose: verbose = 2

+    # I really don't like having to read a file only to cast as StringIO, but seems
+    # necessary to get reading from StdIn to play nicely with pdfparanoia.
+    outputcontent = pdfparanoia.scrub(StringIO(Args.in_pdf.read()), verbose=verbose)
+    Args.in_pdf.close()
+    Args.output.write(outputcontent)
+    if Args.output != sys.stdout:
+        Args.output.close()
--- a/setup.py
+++ b/setup.py
@ -31,7 +31,7 @@ setup(
        "License :: OSI Approved :: BSD License",
        "Operating System :: OS Independent",
        "Programming Language :: Python",
-        #"Programming Language :: Python :: 2.6",
+        # Uses argparse and with statement; 2.7+
        "Programming Language :: Python :: 2.7",
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.1",