mirror of
https://github.com/kanzure/pdfparanoia.git
synced 2025-02-11 21:23:10 +01:00
24 lines
676 B
Plaintext
Executable File
24 lines
676 B
Plaintext
Executable File
for url in `cat urls`; do
|
|
echo "Retrieving $url";
|
|
# TODO allow customizing the proxies, or more generally the retrieval mechanisms
|
|
# TODO allow local caching
|
|
curl --socks localhost:1080 $url > 1.pdf
|
|
curl --socks localhost:1083 $url > 2.pdf
|
|
# TODO verify that we actually obtained pdfs
|
|
if diff 1.pdf 2.pdf; then
|
|
echo "PDFs are identical already, no need to use pdfparanoia"
|
|
else
|
|
cat 1.pdf | pdfparanoia > 1.cleaned.pdf
|
|
cat 2.pdf | pdfparanoia > 2.cleaned.pdf
|
|
if diff 1.cleaned.pdf 2.cleaned.pdf; then
|
|
echo "pdfparanoia successful scrubbed the PDFs"
|
|
else
|
|
echo "pdfparanoia failed!"
|
|
fi
|
|
rm 1.cleaned.pdf
|
|
rm 2.cleaned.pdf
|
|
fi
|
|
rm 1.pdf
|
|
rm 2.pdf
|
|
done
|