mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-24 04:07:30 +01:00
Merge #453
453: Benchmark multi batch indexing r=Kerollmops a=Kerollmops Hey `@irevoire,` could you please add the new benchmarks into influx? Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
commit
382be56d36
@ -83,6 +83,77 @@ fn indexing_songs_default(c: &mut Criterion) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||||
|
let mut group = c.benchmark_group("indexing");
|
||||||
|
group.sample_size(10);
|
||||||
|
group.bench_function("Indexing songs in three batches with default settings", |b| {
|
||||||
|
b.iter_with_setup(
|
||||||
|
move || {
|
||||||
|
let index = setup_index();
|
||||||
|
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
|
|
||||||
|
builder.set_primary_key("id".to_owned());
|
||||||
|
let displayed_fields =
|
||||||
|
["title", "album", "artist", "genre", "country", "released", "duration"]
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect();
|
||||||
|
builder.set_displayed_fields(displayed_fields);
|
||||||
|
|
||||||
|
let searchable_fields =
|
||||||
|
["title", "album", "artist"].iter().map(|s| s.to_string()).collect();
|
||||||
|
builder.set_searchable_fields(searchable_fields);
|
||||||
|
|
||||||
|
let faceted_fields =
|
||||||
|
["released-timestamp", "duration-float", "genre", "country", "artist"]
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect();
|
||||||
|
builder.set_filterable_fields(faceted_fields);
|
||||||
|
builder.execute(|_| ()).unwrap();
|
||||||
|
|
||||||
|
// We index only one half of the dataset in the setup part
|
||||||
|
// as we don't care about the time it takes.
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_1_2, "csv");
|
||||||
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
index
|
||||||
|
},
|
||||||
|
move |index| {
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_3_4, "csv");
|
||||||
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_4_4, "csv");
|
||||||
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
index.prepare_for_closing().wait();
|
||||||
|
},
|
||||||
|
)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||||
let mut group = c.benchmark_group("indexing");
|
let mut group = c.benchmark_group("indexing");
|
||||||
group.sample_size(10);
|
group.sample_size(10);
|
||||||
@ -223,6 +294,76 @@ fn indexing_wiki(c: &mut Criterion) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||||
|
let mut group = c.benchmark_group("indexing");
|
||||||
|
group.sample_size(10);
|
||||||
|
group.bench_function("Indexing wiki in three batches", |b| {
|
||||||
|
b.iter_with_setup(
|
||||||
|
move || {
|
||||||
|
let index = setup_index();
|
||||||
|
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
|
|
||||||
|
builder.set_primary_key("id".to_owned());
|
||||||
|
let displayed_fields =
|
||||||
|
["title", "body", "url"].iter().map(|s| s.to_string()).collect();
|
||||||
|
builder.set_displayed_fields(displayed_fields);
|
||||||
|
|
||||||
|
let searchable_fields = ["title", "body"].iter().map(|s| s.to_string()).collect();
|
||||||
|
builder.set_searchable_fields(searchable_fields);
|
||||||
|
|
||||||
|
// there is NO faceted fields at all
|
||||||
|
builder.execute(|_| ()).unwrap();
|
||||||
|
|
||||||
|
// We index only one half of the dataset in the setup part
|
||||||
|
// as we don't care about the time it takes.
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config =
|
||||||
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
let documents =
|
||||||
|
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_1_2, "csv");
|
||||||
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
index
|
||||||
|
},
|
||||||
|
move |index| {
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config =
|
||||||
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
|
||||||
|
let documents =
|
||||||
|
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_3_4, "csv");
|
||||||
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
|
let indexing_config =
|
||||||
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
|
||||||
|
let documents =
|
||||||
|
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_4_4, "csv");
|
||||||
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
index.prepare_for_closing().wait();
|
||||||
|
},
|
||||||
|
)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
fn indexing_movies_default(c: &mut Criterion) {
|
fn indexing_movies_default(c: &mut Criterion) {
|
||||||
let mut group = c.benchmark_group("indexing");
|
let mut group = c.benchmark_group("indexing");
|
||||||
group.sample_size(10);
|
group.sample_size(10);
|
||||||
@ -272,6 +413,77 @@ fn indexing_movies_default(c: &mut Criterion) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||||
|
let mut group = c.benchmark_group("indexing");
|
||||||
|
group.sample_size(10);
|
||||||
|
group.bench_function("Indexing movies in three batches", |b| {
|
||||||
|
b.iter_with_setup(
|
||||||
|
move || {
|
||||||
|
let index = setup_index();
|
||||||
|
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
|
|
||||||
|
builder.set_primary_key("id".to_owned());
|
||||||
|
let displayed_fields = ["title", "poster", "overview", "release_date", "genres"]
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect();
|
||||||
|
builder.set_displayed_fields(displayed_fields);
|
||||||
|
|
||||||
|
let searchable_fields =
|
||||||
|
["title", "overview"].iter().map(|s| s.to_string()).collect();
|
||||||
|
builder.set_searchable_fields(searchable_fields);
|
||||||
|
|
||||||
|
let faceted_fields =
|
||||||
|
["released_date", "genres"].iter().map(|s| s.to_string()).collect();
|
||||||
|
builder.set_filterable_fields(faceted_fields);
|
||||||
|
|
||||||
|
builder.execute(|_| ()).unwrap();
|
||||||
|
|
||||||
|
// We index only one half of the dataset in the setup part
|
||||||
|
// as we don't care about the time it takes.
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
|
||||||
|
let documents = utils::documents_from(datasets_paths::MOVIES_1_2, "json");
|
||||||
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
index
|
||||||
|
},
|
||||||
|
move |index| {
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
|
||||||
|
let documents = utils::documents_from(datasets_paths::MOVIES_3_4, "json");
|
||||||
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
|
||||||
|
let documents = utils::documents_from(datasets_paths::MOVIES_4_4, "json");
|
||||||
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
index.prepare_for_closing().wait();
|
||||||
|
},
|
||||||
|
)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
fn indexing_geo(c: &mut Criterion) {
|
fn indexing_geo(c: &mut Criterion) {
|
||||||
let mut group = c.benchmark_group("indexing");
|
let mut group = c.benchmark_group("indexing");
|
||||||
group.sample_size(10);
|
group.sample_size(10);
|
||||||
@ -332,8 +544,11 @@ criterion_group!(
|
|||||||
indexing_songs_default,
|
indexing_songs_default,
|
||||||
indexing_songs_without_faceted_numbers,
|
indexing_songs_without_faceted_numbers,
|
||||||
indexing_songs_without_faceted_fields,
|
indexing_songs_without_faceted_fields,
|
||||||
|
indexing_songs_in_three_batches_default,
|
||||||
indexing_wiki,
|
indexing_wiki,
|
||||||
|
indexing_wiki_in_three_batches,
|
||||||
indexing_movies_default,
|
indexing_movies_default,
|
||||||
|
indexing_movies_in_three_batches,
|
||||||
indexing_geo
|
indexing_geo
|
||||||
);
|
);
|
||||||
criterion_main!(benches);
|
criterion_main!(benches);
|
||||||
|
@ -11,10 +11,35 @@ use reqwest::IntoUrl;
|
|||||||
const BASE_URL: &str = "https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets";
|
const BASE_URL: &str = "https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets";
|
||||||
|
|
||||||
const DATASET_SONGS: (&str, &str) = ("smol-songs", "csv");
|
const DATASET_SONGS: (&str, &str) = ("smol-songs", "csv");
|
||||||
|
const DATASET_SONGS_1_2: (&str, &str) = ("smol-songs-1_2", "csv");
|
||||||
|
const DATASET_SONGS_3_4: (&str, &str) = ("smol-songs-3_4", "csv");
|
||||||
|
const DATASET_SONGS_4_4: (&str, &str) = ("smol-songs-4_4", "csv");
|
||||||
const DATASET_WIKI: (&str, &str) = ("smol-wiki-articles", "csv");
|
const DATASET_WIKI: (&str, &str) = ("smol-wiki-articles", "csv");
|
||||||
|
const DATASET_WIKI_1_2: (&str, &str) = ("smol-wiki-articles-1_2", "csv");
|
||||||
|
const DATASET_WIKI_3_4: (&str, &str) = ("smol-wiki-articles-3_4", "csv");
|
||||||
|
const DATASET_WIKI_4_4: (&str, &str) = ("smol-wiki-articles-4_4", "csv");
|
||||||
const DATASET_MOVIES: (&str, &str) = ("movies", "json");
|
const DATASET_MOVIES: (&str, &str) = ("movies", "json");
|
||||||
|
const DATASET_MOVIES_1_2: (&str, &str) = ("movies-1_2", "json");
|
||||||
|
const DATASET_MOVIES_3_4: (&str, &str) = ("movies-3_4", "json");
|
||||||
|
const DATASET_MOVIES_4_4: (&str, &str) = ("movies-4_4", "json");
|
||||||
const DATASET_GEO: (&str, &str) = ("smol-all-countries", "jsonl");
|
const DATASET_GEO: (&str, &str) = ("smol-all-countries", "jsonl");
|
||||||
|
|
||||||
|
const ALL_DATASETS: &[(&str, &str)] = &[
|
||||||
|
DATASET_SONGS,
|
||||||
|
DATASET_SONGS_1_2,
|
||||||
|
DATASET_SONGS_3_4,
|
||||||
|
DATASET_SONGS_4_4,
|
||||||
|
DATASET_WIKI,
|
||||||
|
DATASET_WIKI_1_2,
|
||||||
|
DATASET_WIKI_3_4,
|
||||||
|
DATASET_WIKI_4_4,
|
||||||
|
DATASET_MOVIES,
|
||||||
|
DATASET_MOVIES_1_2,
|
||||||
|
DATASET_MOVIES_3_4,
|
||||||
|
DATASET_MOVIES_4_4,
|
||||||
|
DATASET_GEO,
|
||||||
|
];
|
||||||
|
|
||||||
/// The name of the environment variable used to select the path
|
/// The name of the environment variable used to select the path
|
||||||
/// of the directory containing the datasets
|
/// of the directory containing the datasets
|
||||||
const BASE_DATASETS_PATH_KEY: &str = "MILLI_BENCH_DATASETS_PATH";
|
const BASE_DATASETS_PATH_KEY: &str = "MILLI_BENCH_DATASETS_PATH";
|
||||||
@ -33,7 +58,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
)?;
|
)?;
|
||||||
writeln!(manifest_paths_file)?;
|
writeln!(manifest_paths_file)?;
|
||||||
|
|
||||||
for (dataset, extension) in [DATASET_SONGS, DATASET_WIKI, DATASET_MOVIES, DATASET_GEO] {
|
for (dataset, extension) in ALL_DATASETS {
|
||||||
let out_path = out_dir.join(dataset);
|
let out_path = out_dir.join(dataset);
|
||||||
let out_file = out_path.with_extension(extension);
|
let out_file = out_path.with_extension(extension);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user