From d0b44c380f6bb98aba0c57ee32910d4d6f71a948 Mon Sep 17 00:00:00 2001 From: tamo Date: Wed, 14 Apr 2021 12:09:51 +0200 Subject: [PATCH] add benchmarks on a wiki dataset --- milli/Cargo.toml | 4 ++ milli/benches/wiki.rs | 127 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 milli/benches/wiki.rs diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 2bdb3f4dc..1c0f74613 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -63,3 +63,7 @@ default = [] [[bench]] name = "songs" harness = false + +[[bench]] +name = "wiki" +harness = false diff --git a/milli/benches/wiki.rs b/milli/benches/wiki.rs new file mode 100644 index 000000000..fc8af02e5 --- /dev/null +++ b/milli/benches/wiki.rs @@ -0,0 +1,127 @@ +mod utils; + +use criterion::{criterion_group, criterion_main}; +use milli::update::Settings; +use utils::Conf; + +fn base_conf(builder: &mut Settings) { + let displayed_fields = ["title", "body", "url"] + .iter() + .map(|s| s.to_string()) + .collect(); + builder.set_displayed_fields(displayed_fields); + + let searchable_fields = ["title", "body"].iter().map(|s| s.to_string()).collect(); + builder.set_searchable_fields(searchable_fields); +} + +const BASE_CONF: Conf = Conf { + dataset: "smol-wiki-articles.csv", + queries: &[ + "mingus ", // 46 candidates + "miles davis ", // 159 + "rock and roll ", // 1007 + "machine ", // 3448 + "spain ", // 7002 + "japan ", // 10.593 + "france ", // 17.616 + "film ", // 24.959 + ], + configure: base_conf, + ..Conf::BASE +}; + +fn bench_songs(c: &mut criterion::Criterion) { + let basic_with_quote: Vec = BASE_CONF + .queries + .iter() + .map(|s| { + s.trim() + .split(' ') + .map(|s| format!(r#""{}""#, s)).collect::>().join(" ") + }) + .collect(); + let basic_with_quote: &[&str] = &basic_with_quote.iter().map(|s| s.as_str()).collect::>(); + + let confs = &[ + /* first we bench each criterion alone */ + utils::Conf { + group_name: "proximity", + queries: &[ + "herald sings ", + "april paris ", + "tea two ", + "diesel engine ", + ], + criterion: Some(&["proximity"]), + optional_words: false, + ..BASE_CONF + }, + utils::Conf { + group_name: "typo", + queries: &[ + "migrosoft ", + "linax ", + "Disnaylande ", + "phytogropher ", + "nympalidea ", + "aritmetric ", + "the fronce ", + "sisan ", + ], + criterion: Some(&["typo"]), + optional_words: false, + ..BASE_CONF + }, + utils::Conf { + group_name: "words", + queries: &[ + "the black saint and the sinner lady and the good doggo ", // four words to pop, 27 results + "Kameya Tokujirō mingus monk ", // two words to pop, 55 + "Ulrich Hensel meilisearch milli ", // two words to pop, 306 + "Idaho Bellevue pizza ", // one word to pop, 800 + "Abraham machin ", // one word to pop, 1141 + ], + criterion: Some(&["words"]), + ..BASE_CONF + }, + /* the we bench some global / normal search with all the default criterion in the default + * order */ + utils::Conf { + group_name: "basic placeholder", + queries: &[""], + ..BASE_CONF + }, + utils::Conf { + group_name: "basic without quote", + queries: &BASE_CONF + .queries + .iter() + .map(|s| s.trim()) // we remove the space at the end of each request + .collect::>(), + ..BASE_CONF + }, + utils::Conf { + group_name: "basic with quote", + queries: basic_with_quote, + ..BASE_CONF + }, + utils::Conf { + group_name: "prefix search", + queries: &[ + "t", // 453k results + "c", // 405k + "g", // 318k + "j", // 227k + "q", // 71k + "x", // 17k + ], + ..BASE_CONF + }, + ]; + + utils::run_benches(c, confs); +} + +criterion_group!(benches, bench_songs); +criterion_main!(benches);