mod datasets_paths; use std::fs::{create_dir_all, remove_dir_all, File}; use std::path::Path; use criterion::{criterion_group, criterion_main, Criterion}; use heed::EnvOpenOptions; use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; use milli::Index; #[cfg(target_os = "linux")] #[global_allocator] static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; fn setup_dir(path: impl AsRef) { match remove_dir_all(path.as_ref()) { Ok(_) => (), Err(e) if e.kind() == std::io::ErrorKind::NotFound => (), Err(e) => panic!("{}", e), } create_dir_all(path).unwrap(); } fn setup_index() -> Index { let path = "benches.mmdb"; setup_dir(&path); let mut options = EnvOpenOptions::new(); options.map_size(100 * 1024 * 1024 * 1024); // 100 GB options.max_readers(10); Index::new(options, path).unwrap() } fn indexing_songs_default(c: &mut Criterion) { let index = setup_index(); let update_builder = UpdateBuilder::new(0); let mut wtxn = index.write_txn().unwrap(); let mut builder = update_builder.settings(&mut wtxn, &index); builder.set_primary_key("id".to_owned()); let displayed_fields = ["title", "album", "artist", "genre", "country", "released", "duration"] .iter() .map(|s| s.to_string()) .collect(); builder.set_displayed_fields(displayed_fields); let searchable_fields = ["title", "album", "artist"].iter().map(|s| s.to_string()).collect(); builder.set_searchable_fields(searchable_fields); let faceted_fields = ["released-timestamp", "duration-float", "genre", "country", "artist"] .iter() .map(|s| s.to_string()) .collect(); builder.set_filterable_fields(faceted_fields); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); let index_ref = &index; let mut group = c.benchmark_group("indexing"); group.sample_size(10); group.bench_function("Indexing songs with default settings", |b| { b.iter_with_setup( move || { let update_builder = UpdateBuilder::new(0); let mut wtxn = index_ref.write_txn().unwrap(); let builder = update_builder.delete_documents(&mut wtxn, index_ref).unwrap(); builder.execute().unwrap(); wtxn.commit().unwrap(); () }, move |_| { let update_builder = UpdateBuilder::new(0); let mut wtxn = index_ref.write_txn().unwrap(); let mut builder = update_builder.index_documents(&mut wtxn, index_ref); builder.update_format(UpdateFormat::Csv); builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); let reader = File::open(datasets_paths::SMOL_SONGS).expect(&format!( "could not find the dataset in: {}", datasets_paths::SMOL_SONGS )); builder.execute(reader, |_, _| ()).unwrap(); wtxn.commit().unwrap(); }, ) }); index.prepare_for_closing().wait(); } fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { let index = setup_index(); let update_builder = UpdateBuilder::new(0); let mut wtxn = index.write_txn().unwrap(); let mut builder = update_builder.settings(&mut wtxn, &index); builder.set_primary_key("id".to_owned()); let displayed_fields = ["title", "album", "artist", "genre", "country", "released", "duration"] .iter() .map(|s| s.to_string()) .collect(); builder.set_displayed_fields(displayed_fields); let searchable_fields = ["title", "album", "artist"].iter().map(|s| s.to_string()).collect(); builder.set_searchable_fields(searchable_fields); let faceted_fields = ["genre", "country", "artist"].iter().map(|s| s.to_string()).collect(); builder.set_filterable_fields(faceted_fields); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); let index_ref = &index; let mut group = c.benchmark_group("indexing"); group.sample_size(10); group.bench_function("Indexing songs without faceted numbers", |b| { b.iter_with_setup( move || { let update_builder = UpdateBuilder::new(0); let mut wtxn = index_ref.write_txn().unwrap(); let builder = update_builder.delete_documents(&mut wtxn, index_ref).unwrap(); builder.execute().unwrap(); wtxn.commit().unwrap(); () }, move |_| { let update_builder = UpdateBuilder::new(0); let mut wtxn = index_ref.write_txn().unwrap(); let mut builder = update_builder.index_documents(&mut wtxn, index_ref); builder.update_format(UpdateFormat::Csv); builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); let reader = File::open(datasets_paths::SMOL_SONGS).expect(&format!( "could not find the dataset in: {}", datasets_paths::SMOL_SONGS )); builder.execute(reader, |_, _| ()).unwrap(); wtxn.commit().unwrap(); }, ) }); index.prepare_for_closing().wait(); } fn indexing_songs_without_faceted_fields(c: &mut Criterion) { let index = setup_index(); let update_builder = UpdateBuilder::new(0); let mut wtxn = index.write_txn().unwrap(); let mut builder = update_builder.settings(&mut wtxn, &index); builder.set_primary_key("id".to_owned()); let displayed_fields = ["title", "album", "artist", "genre", "country", "released", "duration"] .iter() .map(|s| s.to_string()) .collect(); builder.set_displayed_fields(displayed_fields); let searchable_fields = ["title", "album", "artist"].iter().map(|s| s.to_string()).collect(); builder.set_searchable_fields(searchable_fields); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); let index_ref = &index; let mut group = c.benchmark_group("indexing"); group.sample_size(10); group.bench_function("Indexing songs without any facets", |b| { b.iter_with_setup( move || { let update_builder = UpdateBuilder::new(0); let mut wtxn = index_ref.write_txn().unwrap(); let builder = update_builder.delete_documents(&mut wtxn, index_ref).unwrap(); builder.execute().unwrap(); wtxn.commit().unwrap(); () }, move |_| { let update_builder = UpdateBuilder::new(0); let mut wtxn = index_ref.write_txn().unwrap(); let mut builder = update_builder.index_documents(&mut wtxn, index_ref); builder.update_format(UpdateFormat::Csv); builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); let reader = File::open(datasets_paths::SMOL_SONGS).expect(&format!( "could not find the dataset in: {}", datasets_paths::SMOL_SONGS )); builder.execute(reader, |_, _| ()).unwrap(); wtxn.commit().unwrap(); }, ) }); index.prepare_for_closing().wait(); } fn indexing_wiki(c: &mut Criterion) { let index = setup_index(); let update_builder = UpdateBuilder::new(0); let mut wtxn = index.write_txn().unwrap(); let mut builder = update_builder.settings(&mut wtxn, &index); builder.set_primary_key("id".to_owned()); let displayed_fields = ["title", "body", "url"].iter().map(|s| s.to_string()).collect(); builder.set_displayed_fields(displayed_fields); let searchable_fields = ["title", "body"].iter().map(|s| s.to_string()).collect(); builder.set_searchable_fields(searchable_fields); // there is NO faceted fields at all builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); let index_ref = &index; let mut group = c.benchmark_group("indexing"); group.sample_size(10); group.bench_function("Indexing wiki", |b| { b.iter_with_setup( move || { let update_builder = UpdateBuilder::new(0); let mut wtxn = index_ref.write_txn().unwrap(); let builder = update_builder.delete_documents(&mut wtxn, index_ref).unwrap(); builder.execute().unwrap(); wtxn.commit().unwrap(); () }, move |_| { let update_builder = UpdateBuilder::new(0); let mut wtxn = index_ref.write_txn().unwrap(); let mut builder = update_builder.index_documents(&mut wtxn, index_ref); builder.update_format(UpdateFormat::Csv); builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); let reader = File::open(datasets_paths::SMOL_WIKI_ARTICLES).expect(&format!( "could not find the dataset in: {}", datasets_paths::SMOL_SONGS )); builder.execute(reader, |_, _| ()).unwrap(); wtxn.commit().unwrap(); }, ) }); index.prepare_for_closing().wait(); } fn indexing_movies_default(c: &mut Criterion) { let index = setup_index(); let update_builder = UpdateBuilder::new(0); let mut wtxn = index.write_txn().unwrap(); let mut builder = update_builder.settings(&mut wtxn, &index); builder.set_primary_key("id".to_owned()); let displayed_fields = ["title", "poster", "overview", "release_date", "genres"] .iter() .map(|s| s.to_string()) .collect(); builder.set_displayed_fields(displayed_fields); let searchable_fields = ["title", "overview"].iter().map(|s| s.to_string()).collect(); builder.set_searchable_fields(searchable_fields); let faceted_fields = ["released_date", "genres"].iter().map(|s| s.to_string()).collect(); builder.set_filterable_fields(faceted_fields); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); let index_ref = &index; let mut group = c.benchmark_group("indexing"); group.sample_size(10); group.bench_function("Indexing movies with default settings", |b| { b.iter_with_setup( move || { let update_builder = UpdateBuilder::new(0); let mut wtxn = index_ref.write_txn().unwrap(); let builder = update_builder.delete_documents(&mut wtxn, index_ref).unwrap(); builder.execute().unwrap(); wtxn.commit().unwrap(); () }, move |_| { let update_builder = UpdateBuilder::new(0); let mut wtxn = index_ref.write_txn().unwrap(); let mut builder = update_builder.index_documents(&mut wtxn, index_ref); builder.update_format(UpdateFormat::Json); builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); let reader = File::open(datasets_paths::MOVIES) .expect(&format!("could not find the dataset in: {}", datasets_paths::MOVIES)); builder.execute(reader, |_, _| ()).unwrap(); wtxn.commit().unwrap(); }, ) }); index.prepare_for_closing().wait(); } criterion_group!( benches, indexing_songs_default, indexing_songs_without_faceted_numbers, indexing_songs_without_faceted_fields, indexing_wiki, indexing_movies_default ); criterion_main!(benches);