support pdf formats with whitespace line endings

JSTOR pdfs have whitespace at the end of each line in their pdfs. Though
their watermarks are not yet removable, this supports parsing their
files in the future or any other publisher that does similar things.

see #1
This commit is contained in:
Bryan Bishop 2013-02-05 19:07:28 -06:00
parent bc89bc5335
commit 8eb8797eeb
1 changed files with 2 additions and 2 deletions

View File

@ -18,14 +18,14 @@ def remove_object_by_id(content, objid):
for line in lines:
if not skip_mode:
if last_line in ["endobj", None]:
if line[-3:] == "obj" or " obj<<" in line[0:50]:
if line[-3:] == "obj" or line[-4:] == "obj " or " obj<<" in line[0:50]:
if line.startswith(str(objid) + " "):
skip_mode = True
last_line = line
continue
outlines.append(line)
elif skip_mode:
if line == "endobj":
if line == "endobj" or line == "endobj ":
skip_mode = False
last_line = line
output = "\n".join(outlines)