mirror of
https://github.com/kanzure/pdfparanoia.git
synced 2024-12-04 23:15:52 +01:00
Merge pull request #38 from fmap/pdfminer-api
PDFMiner made breaking interface changes
This commit is contained in:
commit
5cc682e2c5
@ -16,6 +16,7 @@ except ImportError: # py3k
|
||||
# from pdfquery import PDFQuery
|
||||
|
||||
import pdfminer.pdfparser
|
||||
import pdfminer.pdfdocument
|
||||
|
||||
from .eraser import replace_object_with
|
||||
|
||||
@ -28,9 +29,7 @@ def parse_pdf(handler):
|
||||
|
||||
# setup for parsing
|
||||
parser = pdfminer.pdfparser.PDFParser(handler)
|
||||
doc = pdfminer.pdfparser.PDFDocument()
|
||||
parser.set_document(doc)
|
||||
doc.set_parser(parser)
|
||||
doc = pdfminer.pdfdocument.PDFDocument(parser)
|
||||
|
||||
# actual parsing
|
||||
doc.initialize()
|
||||
@ -58,8 +57,7 @@ def deflate(content):
|
||||
pdf = parse_content(content)
|
||||
|
||||
# get a list of all object ids
|
||||
xrefs = pdf._parser.read_xref()
|
||||
xref = xrefs[0]
|
||||
xref = pdf.xrefs[0]
|
||||
objids = xref.get_objids()
|
||||
|
||||
# store new replacements
|
||||
|
@ -25,8 +25,7 @@ class AmericanInstituteOfPhysics(Plugin):
|
||||
pdf = parse_content(content)
|
||||
|
||||
# get a list of all object ids
|
||||
xrefs = pdf._parser.read_xref()
|
||||
xref = xrefs[0]
|
||||
xref = pdf.xrefs[0]
|
||||
objids = xref.get_objids()
|
||||
|
||||
# check each object in the pdf
|
||||
|
@ -22,8 +22,7 @@ class IEEEXplore(Plugin):
|
||||
pdf = parse_content(content)
|
||||
|
||||
# get a list of all object ids
|
||||
xrefs = pdf._parser.read_xref()
|
||||
xref = xrefs[0]
|
||||
xref = pdf.xrefs[0]
|
||||
objids = xref.get_objids()
|
||||
|
||||
# check each object in the pdf
|
||||
|
@ -10,6 +10,8 @@ from ..eraser import (
|
||||
)
|
||||
from ..plugin import Plugin
|
||||
|
||||
from pdfminer.pdftypes import PDFObjectNotFound
|
||||
|
||||
class JSTOR(Plugin):
|
||||
"""
|
||||
JSTOR
|
||||
@ -44,13 +46,13 @@ class JSTOR(Plugin):
|
||||
pdf = parse_content(content)
|
||||
|
||||
# get a list of all object ids
|
||||
xrefs = pdf._parser.read_xref()
|
||||
xref = xrefs[0]
|
||||
xref = pdf.xrefs[0]
|
||||
objids = xref.get_objids()
|
||||
|
||||
# check each object in the pdf
|
||||
for objid in objids:
|
||||
# get an object by id
|
||||
try:
|
||||
obj = pdf.getobj(objid)
|
||||
|
||||
if hasattr(obj, "attrs"):
|
||||
@ -93,6 +95,8 @@ class JSTOR(Plugin):
|
||||
replacements.append([objid, better_content])
|
||||
|
||||
page_id += 1
|
||||
except PDFObjectNotFound, e:
|
||||
print >>sys.stderr, 'Missing object: %r' % e
|
||||
|
||||
if verbose >= 1 and replacements:
|
||||
sys.stderr.write("%s: Found objects %s with %r; omitting..." % (cls.__name__, [deets[0] for deets in replacements], cls.requirements))
|
||||
|
@ -42,8 +42,7 @@ class RoyalSocietyOfChemistry(Plugin):
|
||||
pdf = parse_content(content)
|
||||
|
||||
# get a list of all object ids
|
||||
xrefs = pdf._parser.read_xref()
|
||||
xref = xrefs[0]
|
||||
xref = pdf.xrefs[0]
|
||||
objids = xref.get_objids()
|
||||
|
||||
# check each object in the pdf
|
||||
|
@ -27,8 +27,7 @@ class ScienceMagazine(Plugin):
|
||||
pdf = parse_content(content)
|
||||
|
||||
# get a list of all object ids
|
||||
xrefs = pdf._parser.read_xref()
|
||||
xref = xrefs[0]
|
||||
xref = pdf.xrefs[0]
|
||||
objids = xref.get_objids()
|
||||
|
||||
# check each object in the pdf
|
||||
|
@ -1 +1 @@
|
||||
pdfminer>=0
|
||||
pdfminer>=20131113
|
||||
|
Loading…
Reference in New Issue
Block a user