mirror of
https://github.com/kanzure/pdfparanoia.git
synced 2025-02-11 13:13:10 +01:00
39 lines
1.7 KiB
Python
Executable File
39 lines
1.7 KiB
Python
Executable File
#!/usr/bin/env python2.7
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
pdfparanoia - pdf watermark removal tool
|
|
|
|
This is the command-line client. It accepts pdf formatted data either through
|
|
stdin/piping or by referencing a file in argv[0].
|
|
"""
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
import pdfparanoia
|
|
import argparse
|
|
from StringIO import StringIO
|
|
|
|
ArgP = argparse.ArgumentParser(description="pdfparanoia is a PDF watermark removal library for academic papers. Some publishers include private information like institution names, personal names, ip addresses, timestamps and other identifying information in watermarks on each page.")
|
|
ArgP.add_argument('in_pdf', nargs='?', type=argparse.FileType('rb'),
|
|
default='-') # argparse.FileType interprets "-" as Stdin.
|
|
ArgP.add_argument("-o", "--output", type=argparse.FileType('wb'),
|
|
default=sys.stdout)
|
|
ArgP.add_argument("-v", "--verbose", action="store_true", default=False,
|
|
help="Output more information, which may be sensitive or excessive.")
|
|
ArgP.add_argument("-V", "--more-verbose", action="store_true", default=False,
|
|
help="Output even more information. Implies -v.")
|
|
Args = ArgP.parse_args()
|
|
|
|
verbose = 0
|
|
if Args.verbose: verbose = 1
|
|
if Args.more_verbose: verbose = 2
|
|
|
|
# I really don't like having to read a file only to cast as StringIO, but seems
|
|
# necessary to get reading from StdIn to play nicely with pdfparanoia.
|
|
outputcontent = pdfparanoia.scrub(StringIO(Args.in_pdf.read()), verbose=verbose)
|
|
Args.in_pdf.close()
|
|
Args.output.write(outputcontent)
|
|
if Args.output != sys.stdout:
|
|
Args.output.close()
|