Adapt to PDFMiner's breaking interface changes (#37).

This commit is contained in:
vi 2013-12-07 07:12:19 +08:00
parent 713776af67
commit 380bc289b3
6 changed files with 8 additions and 15 deletions

View File

@ -16,6 +16,7 @@ except ImportError: # py3k
# from pdfquery import PDFQuery
import pdfminer.pdfparser
import pdfminer.pdfdocument
from .eraser import replace_object_with
@ -28,9 +29,7 @@ def parse_pdf(handler):
# setup for parsing
parser = pdfminer.pdfparser.PDFParser(handler)
doc = pdfminer.pdfparser.PDFDocument()
parser.set_document(doc)
doc.set_parser(parser)
doc = pdfminer.pdfdocument.PDFDocument(parser)
# actual parsing
doc.initialize()
@ -58,8 +57,7 @@ def deflate(content):
pdf = parse_content(content)
# get a list of all object ids
xrefs = pdf._parser.read_xref()
xref = xrefs[0]
xref = pdf.xrefs[0]
objids = xref.get_objids()
# store new replacements

View File

@ -25,8 +25,7 @@ class AmericanInstituteOfPhysics(Plugin):
pdf = parse_content(content)
# get a list of all object ids
xrefs = pdf._parser.read_xref()
xref = xrefs[0]
xref = pdf.xrefs[0]
objids = xref.get_objids()
# check each object in the pdf

View File

@ -22,8 +22,7 @@ class IEEEXplore(Plugin):
pdf = parse_content(content)
# get a list of all object ids
xrefs = pdf._parser.read_xref()
xref = xrefs[0]
xref = pdf.xrefs[0]
objids = xref.get_objids()
# check each object in the pdf

View File

@ -44,8 +44,7 @@ class JSTOR(Plugin):
pdf = parse_content(content)
# get a list of all object ids
xrefs = pdf._parser.read_xref()
xref = xrefs[0]
xref = pdf.xrefs[0]
objids = xref.get_objids()
# check each object in the pdf

View File

@ -42,8 +42,7 @@ class RoyalSocietyOfChemistry(Plugin):
pdf = parse_content(content)
# get a list of all object ids
xrefs = pdf._parser.read_xref()
xref = xrefs[0]
xref = pdf.xrefs[0]
objids = xref.get_objids()
# check each object in the pdf

View File

@ -27,8 +27,7 @@ class ScienceMagazine(Plugin):
pdf = parse_content(content)
# get a list of all object ids
xrefs = pdf._parser.read_xref()
xref = xrefs[0]
xref = pdf.xrefs[0]
objids = xref.get_objids()
# check each object in the pdf