Add a new movies benchmark to test multi batch indexing

This commit is contained in:
Clément Renault 2022-02-22 13:47:37 +01:00 committed by Kerollmops
parent 8d2e3e4aba
commit a820aa11e6
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
2 changed files with 78 additions and 0 deletions

View File

@ -413,6 +413,77 @@ fn indexing_movies_default(c: &mut Criterion) {
}); });
} }
fn indexing_movies_in_three_batches(c: &mut Criterion) {
let mut group = c.benchmark_group("indexing");
group.sample_size(10);
group.bench_function("Indexing movies in three batches", |b| {
b.iter_with_setup(
move || {
let index = setup_index();
let config = IndexerConfig::default();
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, &config);
builder.set_primary_key("id".to_owned());
let displayed_fields = ["title", "poster", "overview", "release_date", "genres"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let searchable_fields =
["title", "overview"].iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields);
let faceted_fields =
["released_date", "genres"].iter().map(|s| s.to_string()).collect();
builder.set_filterable_fields(faceted_fields);
builder.execute(|_| ()).unwrap();
// We index only one half of the dataset in the setup part
// as we don't care about the time it take.
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
let documents = utils::documents_from(datasets_paths::MOVIES_1_2, "json");
builder.add_documents(documents).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
index
},
move |index| {
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut wtxn = index.write_txn().unwrap();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
let documents = utils::documents_from(datasets_paths::MOVIES_3_4, "json");
builder.add_documents(documents).unwrap();
builder.execute().unwrap();
let indexing_config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
let documents = utils::documents_from(datasets_paths::MOVIES_4_4, "json");
builder.add_documents(documents).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
},
)
});
}
fn indexing_geo(c: &mut Criterion) { fn indexing_geo(c: &mut Criterion) {
let mut group = c.benchmark_group("indexing"); let mut group = c.benchmark_group("indexing");
group.sample_size(10); group.sample_size(10);
@ -477,6 +548,7 @@ criterion_group!(
indexing_wiki, indexing_wiki,
indexing_wiki_in_three_batches, indexing_wiki_in_three_batches,
indexing_movies_default, indexing_movies_default,
indexing_movies_in_three_batches,
indexing_geo indexing_geo
); );
criterion_main!(benches); criterion_main!(benches);

View File

@ -19,6 +19,9 @@ const DATASET_WIKI_1_2: (&str, &str) = ("smol-wiki-articles-1_2", "csv");
const DATASET_WIKI_3_4: (&str, &str) = ("smol-wiki-articles-3_4", "csv"); const DATASET_WIKI_3_4: (&str, &str) = ("smol-wiki-articles-3_4", "csv");
const DATASET_WIKI_4_4: (&str, &str) = ("smol-wiki-articles-4_4", "csv"); const DATASET_WIKI_4_4: (&str, &str) = ("smol-wiki-articles-4_4", "csv");
const DATASET_MOVIES: (&str, &str) = ("movies", "json"); const DATASET_MOVIES: (&str, &str) = ("movies", "json");
const DATASET_MOVIES_1_2: (&str, &str) = ("movies-1_2", "json");
const DATASET_MOVIES_3_4: (&str, &str) = ("movies-3_4", "json");
const DATASET_MOVIES_4_4: (&str, &str) = ("movies-4_4", "json");
const DATASET_GEO: (&str, &str) = ("smol-all-countries", "jsonl"); const DATASET_GEO: (&str, &str) = ("smol-all-countries", "jsonl");
const ALL_DATASETS: &[(&str, &str)] = &[ const ALL_DATASETS: &[(&str, &str)] = &[
@ -31,6 +34,9 @@ const ALL_DATASETS: &[(&str, &str)] = &[
DATASET_WIKI_3_4, DATASET_WIKI_3_4,
DATASET_WIKI_4_4, DATASET_WIKI_4_4,
DATASET_MOVIES, DATASET_MOVIES,
DATASET_MOVIES_1_2,
DATASET_MOVIES_3_4,
DATASET_MOVIES_4_4,
DATASET_GEO, DATASET_GEO,
]; ];