From 54b6ab070ae13fe2fc09b0313b0e3f7243b843d6 Mon Sep 17 00:00:00 2001 From: Scott Morrison Date: Thu, 2 May 2013 20:06:18 +1000 Subject: [PATCH] initial attempt to pairwise diff testing --- .gitignore | 3 +++ tests/diff/comparediffs | 23 +++++++++++++++++++++++ tests/diff/urls | 8 ++++++++ 3 files changed, 34 insertions(+) create mode 100755 tests/diff/comparediffs create mode 100644 tests/diff/urls diff --git a/.gitignore b/.gitignore index 4278ff0..66b8e49 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,6 @@ pdfparanoia.egg-info/ # ignore pdfs in the top-level dir /*.pdf + +# temporary pdfs in tests/diff/ +tests/diff/*.pdf diff --git a/tests/diff/comparediffs b/tests/diff/comparediffs new file mode 100755 index 0000000..f49bd51 --- /dev/null +++ b/tests/diff/comparediffs @@ -0,0 +1,23 @@ +for url in `cat urls`; do + echo "Retrieving $url"; + # TODO allow customizing the proxies, or more generally the retrieval mechanisms + # TODO allow local caching + curl --socks localhost:1080 $url > 1.pdf + curl --socks localhost:1083 $url > 2.pdf + # TODO verify that we actually obtained pdfs + if diff 1.pdf 2.pdf; then + echo "PDFs are identical already, no need to use pdfparanoia" + else + cat 1.pdf | pdfparanoia > 1.cleaned.pdf + cat 2.pdf | pdfparanoia > 2.cleaned.pdf + if diff 1.cleaned.pdf 2.cleaned.pdf; then + echo "pdfparanoia successful scrubbed the PDFs" + else + echo "pdfparanoia failed!" + fi + rm 1.cleaned.pdf + rm 2.cleaned.pdf + fi + rm 1.pdf + rm 2.pdf +done diff --git a/tests/diff/urls b/tests/diff/urls new file mode 100644 index 0000000..0900fbc --- /dev/null +++ b/tests/diff/urls @@ -0,0 +1,8 @@ +http://link.springer.com/content/pdf/10.1007/s00440-011-0397-9 +http://msp.org/apde/2012/5-2/apde-v5-n2-p07-s.pdf +http://annals.math.princeton.edu/wp-content/uploads/annals-v176-n2-p11-s.pdf +http://www.worldscientific.com/doi/pdf/10.1142/S2010326311500018 +http://www.sciencedirect.com/science?_ob=MiamiImageURL&_cid=272585&_user=994540&_pii=S0001870812001806&_check=y&_origin=article&_zone=toolbar&_coverDate=10-Sep-2012&view=c&originContentFamily=serial&wchp=dGLzVlt-zSkWb&md5=bfeb5e0619d45362640529aff02baeda&pid=1-s2.0-S0001870812001806-main.pdf +http://www.ams.org/journals/mcom/2012-81-278/S0025-5718-2011-02542-1/S0025-5718-2011-02542-1.pdf +http://www.ems-ph.org/journals/show_pdf.php?issn=1661-7207&vol=5&iss=2&rank=6 +http://nyjm.albany.edu/j/2009/15-14p.pdf