1
0
mirror of https://github.com/kanzure/pdfparanoia.git synced 2024-06-14 06:39:51 +02:00
pdfparanoia/pdfparanoia/eraser.py

34 lines
945 B
Python
Raw Normal View History

2013-02-05 10:10:14 +01:00
# -*- coding: utf-8 -*-
"""
pdfparanoia.eraser
~~~~~~~~~~~~~~~
Tools to erase things from pdfs by direct manipulation of the pdf format.
"""
def remove_object_by_id(content, objid):
"""
Deletes an object from a pdf. Mostly streams and FlateDecode stuff.
"""
outlines = []
lines = content.split("\n")
last_line = None
skip_mode = False
for line in lines:
if not skip_mode:
if last_line in ["endobj", None]:
if line[-3:] == "obj" or line[-4:] == "obj " or " obj<<" in line[0:50]:
2013-02-05 10:10:14 +01:00
if line.startswith(str(objid) + " "):
skip_mode = True
last_line = line
continue
outlines.append(line)
elif skip_mode:
if line == "endobj" or line == "endobj ":
2013-02-05 10:10:14 +01:00
skip_mode = False
last_line = line
output = "\n".join(outlines)
return output