mirror of
https://github.com/kanzure/pdfparanoia.git
synced 2024-12-04 15:05:52 +01:00
Merge pull request #23 from cathalgarvey/master
Updated terminal script to use argparse.
This commit is contained in:
commit
6abfe2a380
10
README.md
10
README.md
@ -16,6 +16,9 @@ or,
|
||||
sudo python setup.py install
|
||||
```
|
||||
|
||||
pdfparanoia is written for python2.7+ or python 3.
|
||||
You will also need to manually install "pdfminer" if you do not use pip to install pdfparanoia.
|
||||
|
||||
## Usage
|
||||
|
||||
``` python
|
||||
@ -23,15 +26,14 @@ import pdfparanoia
|
||||
|
||||
pdf = pdfparanoia.scrub(open("nmat91417.pdf", "rb"))
|
||||
|
||||
file_handler = open("output.pdf", "wb")
|
||||
file_handler.write(pdf)
|
||||
file_handler.close()
|
||||
with open("output.pdf", "wb") as file_handler:
|
||||
file_handler.write(pdf)
|
||||
```
|
||||
|
||||
or from the shell,
|
||||
|
||||
``` bash
|
||||
pdfparanoia --verbose input.pdf > output.pdf
|
||||
pdfparanoia --verbose input.pdf -o output.pdf
|
||||
```
|
||||
|
||||
and,
|
||||
|
@ -10,28 +10,29 @@ stdin/piping or by referencing a file in argv[0].
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
import fileinput
|
||||
import pdfparanoia
|
||||
import argparse
|
||||
from StringIO import StringIO
|
||||
|
||||
ArgP = argparse.ArgumentParser(description="pdfparanoia is a PDF watermark removal library for academic papers. Some publishers include private information like institution names, personal names, ip addresses, timestamps and other identifying information in watermarks on each page.")
|
||||
ArgP.add_argument('in_pdf', nargs='?', type=argparse.FileType('rb'),
|
||||
default='-') # argparse.FileType interprets "-" as Stdin.
|
||||
ArgP.add_argument("-o", "--output", type=argparse.FileType('wb'),
|
||||
default=sys.stdout)
|
||||
ArgP.add_argument("-v", "--verbose", action="store_true", default=False,
|
||||
help="Output more information, which may be sensitive or excessive.")
|
||||
ArgP.add_argument("-V", "--more-verbose", action="store_true", default=False,
|
||||
help="Output even more information. Implies -v.")
|
||||
Args = ArgP.parse_args()
|
||||
|
||||
verbose = 0
|
||||
while '--verbose' in sys.argv:
|
||||
verbose += 1
|
||||
sys.argv.pop(sys.argv.index('--verbose'))
|
||||
|
||||
while '-v' in sys.argv:
|
||||
verbose += 1
|
||||
sys.argv.pop(sys.argv.index('-v'))
|
||||
|
||||
import pdfparanoia
|
||||
|
||||
# read in all lines
|
||||
content = ""
|
||||
for line in fileinput.input():
|
||||
content += line
|
||||
|
||||
# scrub the pdf to get rid of watermarks
|
||||
output = pdfparanoia.scrub(StringIO(content), verbose=verbose)
|
||||
|
||||
# dump to output
|
||||
sys.stdout.write(output)
|
||||
if Args.verbose: verbose = 1
|
||||
if Args.more_verbose: verbose = 2
|
||||
|
||||
# I really don't like having to read a file only to cast as StringIO, but seems
|
||||
# necessary to get reading from StdIn to play nicely with pdfparanoia.
|
||||
outputcontent = pdfparanoia.scrub(StringIO(Args.in_pdf.read()), verbose=verbose)
|
||||
Args.in_pdf.close()
|
||||
Args.output.write(outputcontent)
|
||||
if Args.output != sys.stdout:
|
||||
Args.output.close()
|
||||
|
2
setup.py
2
setup.py
@ -31,7 +31,7 @@ setup(
|
||||
"License :: OSI Approved :: BSD License",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python",
|
||||
#"Programming Language :: Python :: 2.6",
|
||||
# Uses argparse and with statement; 2.7+
|
||||
"Programming Language :: Python :: 2.7",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.1",
|
||||
|
Loading…
Reference in New Issue
Block a user