From db514ff744dcdf1c908e6f7df934178225df8448 Mon Sep 17 00:00:00 2001 From: Cathal Garvey Date: Thu, 21 Mar 2013 23:49:03 +0000 Subject: [PATCH] Fixed a few bugs so reading from stdin now works. Involves a potentially costly recast of file contents as StringIO. --- bin/pdfparanoia | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/pdfparanoia b/bin/pdfparanoia index 744eeda..389ee03 100755 --- a/bin/pdfparanoia +++ b/bin/pdfparanoia @@ -16,7 +16,7 @@ if __name__ == "__main__": ArgP = argparse.ArgumentParser(description="pdfparanoia is a PDF watermark removal library for academic papers. Some publishers include private information like institution names, personal names, ip addresses, timestamps and other identifying information in watermarks on each page.") ArgP.add_argument('in_pdf', nargs='?', type=argparse.FileType('rb'), - default=sys.stdin) + default='-') # argparse.FileType interprets "-" as Stdin. ArgP.add_argument("-o", "--output", type=argparse.FileType('wb'), default=sys.stdout) ArgP.add_argument("-v", "--verbose", action="store_true", default=False, @@ -29,7 +29,9 @@ if __name__ == "__main__": if Args.verbose: verbose = 1 if Args.more_verbose: verbose = 2 - outputcontent = pdfparanoia.scrub(Args.in_pdf, verbose=verbose) + # I really don't like having to read a file only to cast as StringIO, but seems + # necessary to get reading from StdIn to play nicely with pdfparanoia. + outputcontent = pdfparanoia.scrub(StringIO(Args.in_pdf.read()), verbose=verbose) Args.in_pdf.close() Args.output.write(outputcontent) if Args.output != sys.stdout: