1
0
mirror of https://github.com/kanzure/pdfparanoia.git synced 2024-06-10 20:59:52 +02:00
pdfparanoia/pdfparanoia/eraser.py

34 lines
901 B
Python
Raw Normal View History

2013-02-05 10:10:14 +01:00
# -*- coding: utf-8 -*-
"""
pdfparanoia.eraser
~~~~~~~~~~~~~~~
Tools to erase things from pdfs by direct manipulation of the pdf format.
"""
def remove_object_by_id(content, objid):
"""
Deletes an object from a pdf. Mostly streams and FlateDecode stuff.
"""
outlines = []
lines = content.split("\n")
last_line = None
skip_mode = False
for line in lines:
if not skip_mode:
if last_line in ["endobj", None]:
2013-02-05 11:49:56 +01:00
if line[-3:] == "obj" or " obj<<" in line[0:50]:
2013-02-05 10:10:14 +01:00
if line.startswith(str(objid) + " "):
skip_mode = True
last_line = line
continue
outlines.append(line)
elif skip_mode:
if line == "endobj":
skip_mode = False
last_line = line
output = "\n".join(outlines)
return output