1
0
mirror of https://github.com/kanzure/pdfparanoia.git synced 2024-12-04 23:15:52 +01:00

initial attempt to pairwise diff testing

This commit is contained in:
Scott Morrison 2013-05-02 20:06:18 +10:00
parent 6abfe2a380
commit 54b6ab070a
3 changed files with 34 additions and 0 deletions

3
.gitignore vendored
View File

@ -13,3 +13,6 @@ pdfparanoia.egg-info/
# ignore pdfs in the top-level dir # ignore pdfs in the top-level dir
/*.pdf /*.pdf
# temporary pdfs in tests/diff/
tests/diff/*.pdf

23
tests/diff/comparediffs Executable file
View File

@ -0,0 +1,23 @@
for url in `cat urls`; do
echo "Retrieving $url";
# TODO allow customizing the proxies, or more generally the retrieval mechanisms
# TODO allow local caching
curl --socks localhost:1080 $url > 1.pdf
curl --socks localhost:1083 $url > 2.pdf
# TODO verify that we actually obtained pdfs
if diff 1.pdf 2.pdf; then
echo "PDFs are identical already, no need to use pdfparanoia"
else
cat 1.pdf | pdfparanoia > 1.cleaned.pdf
cat 2.pdf | pdfparanoia > 2.cleaned.pdf
if diff 1.cleaned.pdf 2.cleaned.pdf; then
echo "pdfparanoia successful scrubbed the PDFs"
else
echo "pdfparanoia failed!"
fi
rm 1.cleaned.pdf
rm 2.cleaned.pdf
fi
rm 1.pdf
rm 2.pdf
done

8
tests/diff/urls Normal file
View File

@ -0,0 +1,8 @@
http://link.springer.com/content/pdf/10.1007/s00440-011-0397-9
http://msp.org/apde/2012/5-2/apde-v5-n2-p07-s.pdf
http://annals.math.princeton.edu/wp-content/uploads/annals-v176-n2-p11-s.pdf
http://www.worldscientific.com/doi/pdf/10.1142/S2010326311500018
http://www.sciencedirect.com/science?_ob=MiamiImageURL&_cid=272585&_user=994540&_pii=S0001870812001806&_check=y&_origin=article&_zone=toolbar&_coverDate=10-Sep-2012&view=c&originContentFamily=serial&wchp=dGLzVlt-zSkWb&md5=bfeb5e0619d45362640529aff02baeda&pid=1-s2.0-S0001870812001806-main.pdf
http://www.ams.org/journals/mcom/2012-81-278/S0025-5718-2011-02542-1/S0025-5718-2011-02542-1.pdf
http://www.ems-ph.org/journals/show_pdf.php?issn=1661-7207&vol=5&iss=2&rank=6
http://nyjm.albany.edu/j/2009/15-14p.pdf