1
0
mirror of https://github.com/kanzure/pdfparanoia.git synced 2024-12-04 23:15:52 +01:00

Modified the "pdfparanoia" script in bin/ so it uses Argparse and the "with" context statement.

As python 2.6 was already commented as a potential environment, there seemed little
reason to not use Argparse rather than a sys.argv popping system; argparse offers
automatically generated usage documentation and can offer useful errors when input
is incorrect.

The "with" context statement is also highly excellent and should be used wherever
legacy support for old-timers using 2.6 is not needed.
This commit is contained in:
Cathal Garvey 2013-03-21 23:37:34 +00:00
parent 0d1da12f71
commit 95e92420c9
3 changed files with 27 additions and 26 deletions

View File

@ -16,6 +16,9 @@ or,
sudo python setup.py install sudo python setup.py install
``` ```
pdfparanoia is written for python2.7+ or python 3.
You will also need to manually install "pdfminer" if you do not use pip to install pdfparanoia.
## Usage ## Usage
``` python ``` python
@ -23,15 +26,14 @@ import pdfparanoia
pdf = pdfparanoia.scrub(open("nmat91417.pdf", "rb")) pdf = pdfparanoia.scrub(open("nmat91417.pdf", "rb"))
file_handler = open("output.pdf", "wb") with open("output.pdf", "wb") as file_handler:
file_handler.write(pdf) file_handler.write(pdf)
file_handler.close()
``` ```
or from the shell, or from the shell,
``` bash ``` bash
pdfparanoia --verbose input.pdf > output.pdf pdfparanoia --verbose input.pdf -o output.pdf
``` ```
and, and,

View File

@ -10,28 +10,27 @@ stdin/piping or by referencing a file in argv[0].
if __name__ == "__main__": if __name__ == "__main__":
import sys import sys
import fileinput import pdfparanoia
import argparse
from StringIO import StringIO from StringIO import StringIO
ArgP = argparse.ArgumentParser(description="pdfparanoia is a PDF watermark removal library for academic papers. Some publishers include private information like institution names, personal names, ip addresses, timestamps and other identifying information in watermarks on each page.")
ArgP.add_argument('in_pdf', nargs='?', type=argparse.FileType('rb'),
default=sys.stdin)
ArgP.add_argument("-o", "--output", type=argparse.FileType('wb'),
default=sys.stdout)
ArgP.add_argument("-v", "--verbose", action="store_true", default=False,
help="Output more information, which may be sensitive or excessive.")
ArgP.add_argument("-V", "--more-verbose", action="store_true", default=False,
help="Output even more information. Implies -v.")
Args = ArgP.parse_args()
verbose = 0 verbose = 0
while '--verbose' in sys.argv: if Args.verbose: verbose = 1
verbose += 1 if Args.more_verbose: verbose = 2
sys.argv.pop(sys.argv.index('--verbose'))
while '-v' in sys.argv:
verbose += 1
sys.argv.pop(sys.argv.index('-v'))
import pdfparanoia
# read in all lines
content = ""
for line in fileinput.input():
content += line
# scrub the pdf to get rid of watermarks
output = pdfparanoia.scrub(StringIO(content), verbose=verbose)
# dump to output
sys.stdout.write(output)
outputcontent = pdfparanoia.scrub(Args.in_pdf, verbose=verbose)
Args.in_pdf.close()
Args.output.write(outputcontent)
if Args.output != sys.stdout:
Args.output.close()

View File

@ -31,7 +31,7 @@ setup(
"License :: OSI Approved :: BSD License", "License :: OSI Approved :: BSD License",
"Operating System :: OS Independent", "Operating System :: OS Independent",
"Programming Language :: Python", "Programming Language :: Python",
#"Programming Language :: Python :: 2.6", # Uses argparse and with statement; 2.7+
"Programming Language :: Python :: 2.7", "Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.1", "Programming Language :: Python :: 3.1",