mirror of
https://github.com/kanzure/pdfparanoia.git
synced 2024-12-04 23:15:52 +01:00
Merge pull request #23 from cathalgarvey/master
Updated terminal script to use argparse.
This commit is contained in:
commit
6abfe2a380
10
README.md
10
README.md
@ -16,6 +16,9 @@ or,
|
|||||||
sudo python setup.py install
|
sudo python setup.py install
|
||||||
```
|
```
|
||||||
|
|
||||||
|
pdfparanoia is written for python2.7+ or python 3.
|
||||||
|
You will also need to manually install "pdfminer" if you do not use pip to install pdfparanoia.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
``` python
|
``` python
|
||||||
@ -23,15 +26,14 @@ import pdfparanoia
|
|||||||
|
|
||||||
pdf = pdfparanoia.scrub(open("nmat91417.pdf", "rb"))
|
pdf = pdfparanoia.scrub(open("nmat91417.pdf", "rb"))
|
||||||
|
|
||||||
file_handler = open("output.pdf", "wb")
|
with open("output.pdf", "wb") as file_handler:
|
||||||
file_handler.write(pdf)
|
file_handler.write(pdf)
|
||||||
file_handler.close()
|
|
||||||
```
|
```
|
||||||
|
|
||||||
or from the shell,
|
or from the shell,
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
pdfparanoia --verbose input.pdf > output.pdf
|
pdfparanoia --verbose input.pdf -o output.pdf
|
||||||
```
|
```
|
||||||
|
|
||||||
and,
|
and,
|
||||||
|
@ -10,28 +10,29 @@ stdin/piping or by referencing a file in argv[0].
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
import fileinput
|
import pdfparanoia
|
||||||
|
import argparse
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
|
|
||||||
|
ArgP = argparse.ArgumentParser(description="pdfparanoia is a PDF watermark removal library for academic papers. Some publishers include private information like institution names, personal names, ip addresses, timestamps and other identifying information in watermarks on each page.")
|
||||||
|
ArgP.add_argument('in_pdf', nargs='?', type=argparse.FileType('rb'),
|
||||||
|
default='-') # argparse.FileType interprets "-" as Stdin.
|
||||||
|
ArgP.add_argument("-o", "--output", type=argparse.FileType('wb'),
|
||||||
|
default=sys.stdout)
|
||||||
|
ArgP.add_argument("-v", "--verbose", action="store_true", default=False,
|
||||||
|
help="Output more information, which may be sensitive or excessive.")
|
||||||
|
ArgP.add_argument("-V", "--more-verbose", action="store_true", default=False,
|
||||||
|
help="Output even more information. Implies -v.")
|
||||||
|
Args = ArgP.parse_args()
|
||||||
|
|
||||||
verbose = 0
|
verbose = 0
|
||||||
while '--verbose' in sys.argv:
|
if Args.verbose: verbose = 1
|
||||||
verbose += 1
|
if Args.more_verbose: verbose = 2
|
||||||
sys.argv.pop(sys.argv.index('--verbose'))
|
|
||||||
|
|
||||||
while '-v' in sys.argv:
|
|
||||||
verbose += 1
|
|
||||||
sys.argv.pop(sys.argv.index('-v'))
|
|
||||||
|
|
||||||
import pdfparanoia
|
|
||||||
|
|
||||||
# read in all lines
|
|
||||||
content = ""
|
|
||||||
for line in fileinput.input():
|
|
||||||
content += line
|
|
||||||
|
|
||||||
# scrub the pdf to get rid of watermarks
|
|
||||||
output = pdfparanoia.scrub(StringIO(content), verbose=verbose)
|
|
||||||
|
|
||||||
# dump to output
|
|
||||||
sys.stdout.write(output)
|
|
||||||
|
|
||||||
|
# I really don't like having to read a file only to cast as StringIO, but seems
|
||||||
|
# necessary to get reading from StdIn to play nicely with pdfparanoia.
|
||||||
|
outputcontent = pdfparanoia.scrub(StringIO(Args.in_pdf.read()), verbose=verbose)
|
||||||
|
Args.in_pdf.close()
|
||||||
|
Args.output.write(outputcontent)
|
||||||
|
if Args.output != sys.stdout:
|
||||||
|
Args.output.close()
|
||||||
|
2
setup.py
2
setup.py
@ -31,7 +31,7 @@ setup(
|
|||||||
"License :: OSI Approved :: BSD License",
|
"License :: OSI Approved :: BSD License",
|
||||||
"Operating System :: OS Independent",
|
"Operating System :: OS Independent",
|
||||||
"Programming Language :: Python",
|
"Programming Language :: Python",
|
||||||
#"Programming Language :: Python :: 2.6",
|
# Uses argparse and with statement; 2.7+
|
||||||
"Programming Language :: Python :: 2.7",
|
"Programming Language :: Python :: 2.7",
|
||||||
"Programming Language :: Python :: 3",
|
"Programming Language :: Python :: 3",
|
||||||
"Programming Language :: Python :: 3.1",
|
"Programming Language :: Python :: 3.1",
|
||||||
|
Loading…
Reference in New Issue
Block a user