mirror of
https://github.com/kanzure/pdfparanoia.git
synced 2024-12-04 23:15:52 +01:00
Adapt to PDFMiner's breaking interface changes (#37).
This commit is contained in:
parent
713776af67
commit
380bc289b3
@ -16,6 +16,7 @@ except ImportError: # py3k
|
|||||||
# from pdfquery import PDFQuery
|
# from pdfquery import PDFQuery
|
||||||
|
|
||||||
import pdfminer.pdfparser
|
import pdfminer.pdfparser
|
||||||
|
import pdfminer.pdfdocument
|
||||||
|
|
||||||
from .eraser import replace_object_with
|
from .eraser import replace_object_with
|
||||||
|
|
||||||
@ -28,9 +29,7 @@ def parse_pdf(handler):
|
|||||||
|
|
||||||
# setup for parsing
|
# setup for parsing
|
||||||
parser = pdfminer.pdfparser.PDFParser(handler)
|
parser = pdfminer.pdfparser.PDFParser(handler)
|
||||||
doc = pdfminer.pdfparser.PDFDocument()
|
doc = pdfminer.pdfdocument.PDFDocument(parser)
|
||||||
parser.set_document(doc)
|
|
||||||
doc.set_parser(parser)
|
|
||||||
|
|
||||||
# actual parsing
|
# actual parsing
|
||||||
doc.initialize()
|
doc.initialize()
|
||||||
@ -58,8 +57,7 @@ def deflate(content):
|
|||||||
pdf = parse_content(content)
|
pdf = parse_content(content)
|
||||||
|
|
||||||
# get a list of all object ids
|
# get a list of all object ids
|
||||||
xrefs = pdf._parser.read_xref()
|
xref = pdf.xrefs[0]
|
||||||
xref = xrefs[0]
|
|
||||||
objids = xref.get_objids()
|
objids = xref.get_objids()
|
||||||
|
|
||||||
# store new replacements
|
# store new replacements
|
||||||
|
@ -25,8 +25,7 @@ class AmericanInstituteOfPhysics(Plugin):
|
|||||||
pdf = parse_content(content)
|
pdf = parse_content(content)
|
||||||
|
|
||||||
# get a list of all object ids
|
# get a list of all object ids
|
||||||
xrefs = pdf._parser.read_xref()
|
xref = pdf.xrefs[0]
|
||||||
xref = xrefs[0]
|
|
||||||
objids = xref.get_objids()
|
objids = xref.get_objids()
|
||||||
|
|
||||||
# check each object in the pdf
|
# check each object in the pdf
|
||||||
|
@ -22,8 +22,7 @@ class IEEEXplore(Plugin):
|
|||||||
pdf = parse_content(content)
|
pdf = parse_content(content)
|
||||||
|
|
||||||
# get a list of all object ids
|
# get a list of all object ids
|
||||||
xrefs = pdf._parser.read_xref()
|
xref = pdf.xrefs[0]
|
||||||
xref = xrefs[0]
|
|
||||||
objids = xref.get_objids()
|
objids = xref.get_objids()
|
||||||
|
|
||||||
# check each object in the pdf
|
# check each object in the pdf
|
||||||
|
@ -44,8 +44,7 @@ class JSTOR(Plugin):
|
|||||||
pdf = parse_content(content)
|
pdf = parse_content(content)
|
||||||
|
|
||||||
# get a list of all object ids
|
# get a list of all object ids
|
||||||
xrefs = pdf._parser.read_xref()
|
xref = pdf.xrefs[0]
|
||||||
xref = xrefs[0]
|
|
||||||
objids = xref.get_objids()
|
objids = xref.get_objids()
|
||||||
|
|
||||||
# check each object in the pdf
|
# check each object in the pdf
|
||||||
|
@ -42,8 +42,7 @@ class RoyalSocietyOfChemistry(Plugin):
|
|||||||
pdf = parse_content(content)
|
pdf = parse_content(content)
|
||||||
|
|
||||||
# get a list of all object ids
|
# get a list of all object ids
|
||||||
xrefs = pdf._parser.read_xref()
|
xref = pdf.xrefs[0]
|
||||||
xref = xrefs[0]
|
|
||||||
objids = xref.get_objids()
|
objids = xref.get_objids()
|
||||||
|
|
||||||
# check each object in the pdf
|
# check each object in the pdf
|
||||||
|
@ -27,8 +27,7 @@ class ScienceMagazine(Plugin):
|
|||||||
pdf = parse_content(content)
|
pdf = parse_content(content)
|
||||||
|
|
||||||
# get a list of all object ids
|
# get a list of all object ids
|
||||||
xrefs = pdf._parser.read_xref()
|
xref = pdf.xrefs[0]
|
||||||
xref = xrefs[0]
|
|
||||||
objids = xref.get_objids()
|
objids = xref.get_objids()
|
||||||
|
|
||||||
# check each object in the pdf
|
# check each object in the pdf
|
||||||
|
Loading…
Reference in New Issue
Block a user