mirror of
https://github.com/kanzure/pdfparanoia.git
synced 2024-05-30 07:38:03 +02:00
56cc7719da
Also cleaned up some flakes noticed by pyflakes, and make the scrub() be @classmethod instead of @staticmethod so I could use the class for the verbose output. caveats: * there are no unit tests of this patch * now your logs of your stderr have potentially sensitive information in them * the implementation of arg parsing is very low-tech; (a *good* way to do arg parsing is the "argparse" module)
57 lines
1.2 KiB
Python
57 lines
1.2 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
pdfparanoia.core
|
|
~~~~~~~~~~~~~~~
|
|
|
|
This module provides most of the heavy lifting of pdfparanoia.
|
|
|
|
"""
|
|
|
|
import sys
|
|
import inspect
|
|
|
|
from .parser import (
|
|
parse_pdf,
|
|
parse_content,
|
|
)
|
|
|
|
from .plugin import Plugin
|
|
|
|
from pdfparanoia.plugins import *
|
|
|
|
def find_plugins():
|
|
"""
|
|
Returns a list of all compatible plugins.
|
|
"""
|
|
def inspection(thing):
|
|
iswanted = inspect.isclass(thing)
|
|
iswanted = iswanted and issubclass(thing, Plugin)
|
|
iswanted = iswanted and thing is not Plugin
|
|
return iswanted
|
|
plugins = inspect.getmembers(sys.modules[__name__], inspection)
|
|
plugins = [each[1] for each in plugins]
|
|
return plugins
|
|
|
|
def scrub(obj, verbose=False):
|
|
"""
|
|
Removes watermarks from a pdf and returns the resulting pdf as a string.
|
|
"""
|
|
# reset the file handler
|
|
if hasattr(obj, "seek"):
|
|
obj.seek(0)
|
|
else:
|
|
obj = open(obj, "rb")
|
|
|
|
# load up the raw bytes
|
|
content = obj.read()
|
|
|
|
# get a list of plugins that will manipulate this paper
|
|
plugins = find_plugins()
|
|
|
|
# clean this pdf as much as possible
|
|
for plugin in plugins:
|
|
content = plugin.scrub(content, verbose=verbose)
|
|
|
|
return content
|
|
|