pdfparanoia/tests/diff/comparediffs

69 lines
2.5 KiB
Plaintext
Executable File

if [ "$#" != "2" ]; then
echo "Please supply the addresses of two SOCKS proxies:"
echo "compare host1:port1 host2:port2"
exit 1
fi
proxy1="$1"
proxy2="$2"
curl -s --socks $proxy1 http://google.com | grep "The document has moved" > /dev/null
if [ "$?" != "0" ]; then
echo "SOCKS proxy $proxy1 doesn't seem to be working."
exit 2
fi
curl -s --socks $proxy2 http://google.com | grep "The document has moved" > /dev/null
if [ "$?" != "0" ]; then
echo "SOCKS proxy $proxy2 doesn't seem to be working."
exit 3
fi
echo "--------------------------------------------------------------------------------------------------------------------------------"
echo "Comparing PDFs downloaded via '$proxy1' and '$proxy2'"
echo "Please enter URLs, one per line."
echo
echo "PDFs which still differ after pdfparanoia will be left in the pdf/ subdirectory, with suffixes .1.cleaned.pdf and .2.cleaned.pdf"
echo "--------------------------------------------------------------------------------------------------------------------------------"
echo
mkdir -p pdf
while read url; do
hash=`echo $url | openssl sha1 | cut -d ' ' -f 2`
echo "Retrieving $url, with hash $hash";
if [ ! -s pdf/$hash.1.pdf ]; then
curl --socks $proxy1 -L --cookie cookie.jar -A "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)" -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" $url > pdf/$hash.1.pdf
fi
if [ "`head -c 4 pdf/$hash.1.pdf`" == "%PDF" ]; then
if [ ! -s pdf/$hash.2.pdf ]; then
curl --socks $proxy2 -L --cookie cookie.jar -A "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)" -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" $url > pdf/$hash.2.pdf
fi
if [ "`head -c 4 pdf/$hash.2.pdf`" == "%PDF" ]; then
if cmp -s pdf/$hash.1.pdf pdf/$hash.2.pdf; then
echo "PDFs are identical already, no need to use pdfparanoia"
else
echo "PDFs differ, running pdfparanoia"
if [ ! -s pdf/$hash.1.cleaned.pdf ]; then
cat pdf/$hash.1.pdf | pdfparanoia > pdf/$hash.1.cleaned.pdf
fi
if [ ! -s pdf/$hash.2.cleaned.pdf ]; then
cat pdf/$hash.2.pdf | pdfparanoia > pdf/$hash.2.cleaned.pdf
fi
if cmp -s pdf/$hash.1.cleaned.pdf pdf/$hash.2.cleaned.pdf; then
echo "pdfparanoia successful scrubbed the PDFs"
rm pdf/$hash.*.cleaned.pdf
else
echo "pdfparanoia failed!"
fi
fi
else
echo "Download failed from source 2"
rm pdf/$hash.*.pdf
fi
else
echo "Download failed from source 1"
rm pdf/$hash.*.pdf
fi
echo
done