mirror of
https://github.com/kanzure/pdfparanoia.git
synced 2024-12-04 23:15:52 +01:00
support pdf formats with whitespace line endings
JSTOR pdfs have whitespace at the end of each line in their pdfs. Though their watermarks are not yet removable, this supports parsing their files in the future or any other publisher that does similar things. see #1
This commit is contained in:
parent
bc89bc5335
commit
8eb8797eeb
@ -18,14 +18,14 @@ def remove_object_by_id(content, objid):
|
||||
for line in lines:
|
||||
if not skip_mode:
|
||||
if last_line in ["endobj", None]:
|
||||
if line[-3:] == "obj" or " obj<<" in line[0:50]:
|
||||
if line[-3:] == "obj" or line[-4:] == "obj " or " obj<<" in line[0:50]:
|
||||
if line.startswith(str(objid) + " "):
|
||||
skip_mode = True
|
||||
last_line = line
|
||||
continue
|
||||
outlines.append(line)
|
||||
elif skip_mode:
|
||||
if line == "endobj":
|
||||
if line == "endobj" or line == "endobj ":
|
||||
skip_mode = False
|
||||
last_line = line
|
||||
output = "\n".join(outlines)
|
||||
|
Loading…
Reference in New Issue
Block a user