1
0
mirror of https://github.com/kanzure/pdfparanoia.git synced 2024-12-04 23:15:52 +01:00

Merge pull request #23 from cathalgarvey/master

Updated terminal script to use argparse.
This commit is contained in:
Bryan Bishop 2013-03-24 23:12:57 -07:00
commit 6abfe2a380
3 changed files with 29 additions and 26 deletions

View File

@ -16,6 +16,9 @@ or,
sudo python setup.py install
```
pdfparanoia is written for python2.7+ or python 3.
You will also need to manually install "pdfminer" if you do not use pip to install pdfparanoia.
## Usage
``` python
@ -23,15 +26,14 @@ import pdfparanoia
pdf = pdfparanoia.scrub(open("nmat91417.pdf", "rb"))
file_handler = open("output.pdf", "wb")
with open("output.pdf", "wb") as file_handler:
file_handler.write(pdf)
file_handler.close()
```
or from the shell,
``` bash
pdfparanoia --verbose input.pdf > output.pdf
pdfparanoia --verbose input.pdf -o output.pdf
```
and,

View File

@ -10,28 +10,29 @@ stdin/piping or by referencing a file in argv[0].
if __name__ == "__main__":
import sys
import fileinput
import pdfparanoia
import argparse
from StringIO import StringIO
ArgP = argparse.ArgumentParser(description="pdfparanoia is a PDF watermark removal library for academic papers. Some publishers include private information like institution names, personal names, ip addresses, timestamps and other identifying information in watermarks on each page.")
ArgP.add_argument('in_pdf', nargs='?', type=argparse.FileType('rb'),
default='-') # argparse.FileType interprets "-" as Stdin.
ArgP.add_argument("-o", "--output", type=argparse.FileType('wb'),
default=sys.stdout)
ArgP.add_argument("-v", "--verbose", action="store_true", default=False,
help="Output more information, which may be sensitive or excessive.")
ArgP.add_argument("-V", "--more-verbose", action="store_true", default=False,
help="Output even more information. Implies -v.")
Args = ArgP.parse_args()
verbose = 0
while '--verbose' in sys.argv:
verbose += 1
sys.argv.pop(sys.argv.index('--verbose'))
while '-v' in sys.argv:
verbose += 1
sys.argv.pop(sys.argv.index('-v'))
import pdfparanoia
# read in all lines
content = ""
for line in fileinput.input():
content += line
# scrub the pdf to get rid of watermarks
output = pdfparanoia.scrub(StringIO(content), verbose=verbose)
# dump to output
sys.stdout.write(output)
if Args.verbose: verbose = 1
if Args.more_verbose: verbose = 2
# I really don't like having to read a file only to cast as StringIO, but seems
# necessary to get reading from StdIn to play nicely with pdfparanoia.
outputcontent = pdfparanoia.scrub(StringIO(Args.in_pdf.read()), verbose=verbose)
Args.in_pdf.close()
Args.output.write(outputcontent)
if Args.output != sys.stdout:
Args.output.close()

View File

@ -31,7 +31,7 @@ setup(
"License :: OSI Approved :: BSD License",
"Operating System :: OS Independent",
"Programming Language :: Python",
#"Programming Language :: Python :: 2.6",
# Uses argparse and with statement; 2.7+
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.1",