1
0
mirror of https://github.com/kanzure/pdfparanoia.git synced 2024-05-29 07:08:03 +02:00
pdfparanoia/pdfparanoia/core.py
Zooko O'Whielacronx 56cc7719da add a "--verbose" option that writes to stderr if it finds anything to omit
Also cleaned up some flakes noticed by pyflakes, and make the scrub() be @classmethod instead of @staticmethod so I could use the class for the verbose output.

caveats:

* there are no unit tests of this patch
* now your logs of your stderr have potentially sensitive information in them
* the implementation of arg parsing is very low-tech; (a *good* way to do arg parsing is the "argparse" module)
2013-02-13 19:58:47 +00:00

57 lines
1.2 KiB
Python

# -*- coding: utf-8 -*-
"""
pdfparanoia.core
~~~~~~~~~~~~~~~
This module provides most of the heavy lifting of pdfparanoia.
"""
import sys
import inspect
from .parser import (
parse_pdf,
parse_content,
)
from .plugin import Plugin
from pdfparanoia.plugins import *
def find_plugins():
"""
Returns a list of all compatible plugins.
"""
def inspection(thing):
iswanted = inspect.isclass(thing)
iswanted = iswanted and issubclass(thing, Plugin)
iswanted = iswanted and thing is not Plugin
return iswanted
plugins = inspect.getmembers(sys.modules[__name__], inspection)
plugins = [each[1] for each in plugins]
return plugins
def scrub(obj, verbose=False):
"""
Removes watermarks from a pdf and returns the resulting pdf as a string.
"""
# reset the file handler
if hasattr(obj, "seek"):
obj.seek(0)
else:
obj = open(obj, "rb")
# load up the raw bytes
content = obj.read()
# get a list of plugins that will manipulate this paper
plugins = find_plugins()
# clean this pdf as much as possible
for plugin in plugins:
content = plugin.scrub(content, verbose=verbose)
return content