mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-22 19:27:27 +01:00
Add deletion benchmarks
This commit is contained in:
parent
447195a27a
commit
adbb0ff318
@ -16,6 +16,9 @@ jemallocator = "0.3.2"
|
||||
[dev-dependencies]
|
||||
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1" }
|
||||
criterion = { version = "0.3.5", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.9.0"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.56"
|
||||
|
@ -5,14 +5,21 @@ use std::fs::{create_dir_all, remove_dir_all};
|
||||
use std::path::Path;
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use heed::{EnvOpenOptions, RwTxn};
|
||||
use milli::update::{
|
||||
DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings,
|
||||
};
|
||||
use milli::Index;
|
||||
use rand::seq::SliceRandom;
|
||||
use rand_chacha::rand_core::SeedableRng;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
|
||||
const BENCHMARK_ITERATION: usize = 10;
|
||||
|
||||
fn setup_dir(path: impl AsRef<Path>) {
|
||||
match remove_dir_all(path.as_ref()) {
|
||||
Ok(_) => (),
|
||||
@ -31,39 +38,95 @@ fn setup_index() -> Index {
|
||||
Index::new(options, path).unwrap()
|
||||
}
|
||||
|
||||
fn setup_settings<'t>(
|
||||
wtxn: &mut RwTxn<'t, '_>,
|
||||
index: &'t Index,
|
||||
primary_key: &str,
|
||||
searchable_fields: &[&str],
|
||||
filterable_fields: &[&str],
|
||||
sortable_fields: &[&str],
|
||||
) {
|
||||
let config = IndexerConfig::default();
|
||||
let mut builder = Settings::new(wtxn, index, &config);
|
||||
|
||||
builder.set_primary_key(primary_key.to_owned());
|
||||
|
||||
let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_filterable_fields(filterable_fields);
|
||||
|
||||
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_sortable_fields(sortable_fields);
|
||||
|
||||
builder.execute(|_| ()).unwrap();
|
||||
}
|
||||
|
||||
fn setup_index_with_settings<'t>(
|
||||
primary_key: &str,
|
||||
searchable_fields: &[&str],
|
||||
filterable_fields: &[&str],
|
||||
sortable_fields: &[&str],
|
||||
) -> milli::Index {
|
||||
let index = setup_index();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
setup_settings(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
primary_key,
|
||||
searchable_fields,
|
||||
filterable_fields,
|
||||
sortable_fields,
|
||||
);
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index
|
||||
}
|
||||
|
||||
fn choose_document_ids_from_index_batched(
|
||||
index: &Index,
|
||||
count: usize,
|
||||
batch_size: usize,
|
||||
) -> Vec<RoaringBitmap> {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
// create batch of document ids to delete
|
||||
let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(7700);
|
||||
let document_ids: Vec<_> = index.documents_ids(&rtxn).unwrap().into_iter().collect();
|
||||
let document_ids_to_delete: Vec<_> =
|
||||
document_ids.choose_multiple(&mut rng, count).map(Clone::clone).collect();
|
||||
|
||||
document_ids_to_delete
|
||||
.chunks(batch_size)
|
||||
.map(|c| {
|
||||
let mut batch = RoaringBitmap::new();
|
||||
for id in c {
|
||||
batch.insert(*id);
|
||||
}
|
||||
|
||||
batch
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn indexing_songs_default(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(10);
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Indexing songs with default settings", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let index = setup_index();
|
||||
let primary_key = "id";
|
||||
let searchable_fields = ["title", "album", "artist"];
|
||||
let filterable_fields =
|
||||
["released-timestamp", "duration-float", "genre", "country", "artist"];
|
||||
let sortable_fields = [];
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_primary_key("id".to_owned());
|
||||
let displayed_fields =
|
||||
["title", "album", "artist", "genre", "country", "released", "duration"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields =
|
||||
["title", "album", "artist"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let faceted_fields =
|
||||
["released-timestamp", "duration-float", "genre", "country", "artist"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_filterable_fields(faceted_fields);
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
index
|
||||
setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
)
|
||||
},
|
||||
move |index| {
|
||||
let config = IndexerConfig::default();
|
||||
@ -84,41 +147,85 @@ fn indexing_songs_default(c: &mut Criterion) {
|
||||
});
|
||||
}
|
||||
|
||||
fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(10);
|
||||
group.bench_function("Indexing songs in three batches with default settings", |b| {
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Deleting songs in batches with default settings", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let index = setup_index();
|
||||
let primary_key = "id";
|
||||
let searchable_fields = ["title", "album", "artist"];
|
||||
let filterable_fields =
|
||||
["released-timestamp", "duration-float", "genre", "country", "artist"];
|
||||
let sortable_fields = [];
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_primary_key("id".to_owned());
|
||||
let displayed_fields =
|
||||
["title", "album", "artist", "genre", "country", "released", "duration"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields =
|
||||
["title", "album", "artist"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let faceted_fields =
|
||||
["released-timestamp", "duration-float", "genre", "country", "artist"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_filterable_fields(faceted_fields);
|
||||
builder.execute(|_| ()).unwrap();
|
||||
let index = setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
);
|
||||
|
||||
// We index only one half of the dataset in the setup part
|
||||
// as we don't care about the time it takes.
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let count = 1250;
|
||||
let batch_size = 250;
|
||||
let document_ids_to_delete =
|
||||
choose_document_ids_from_index_batched(&index, count, batch_size);
|
||||
|
||||
(index, document_ids_to_delete)
|
||||
},
|
||||
move |(index, document_ids_to_delete)| {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
for ids in document_ids_to_delete {
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
builder.delete_documents(&ids);
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
},
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Indexing songs in three batches with default settings", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let primary_key = "id";
|
||||
let searchable_fields = ["title", "album", "artist"];
|
||||
let filterable_fields =
|
||||
["released-timestamp", "duration-float", "genre", "country", "artist"];
|
||||
let sortable_fields = [];
|
||||
|
||||
let index = setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
);
|
||||
|
||||
// We index only one half of the dataset in the setup part
|
||||
// as we don't care about the time it takes.
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
@ -160,34 +267,21 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
|
||||
fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(10);
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Indexing songs without faceted numbers", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let index = setup_index();
|
||||
let primary_key = "id";
|
||||
let searchable_fields = ["title", "album", "artist"];
|
||||
let filterable_fields = ["genre", "country", "artist"];
|
||||
let sortable_fields = [];
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_primary_key("id".to_owned());
|
||||
let displayed_fields =
|
||||
["title", "album", "artist", "genre", "country", "released", "duration"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields =
|
||||
["title", "album", "artist"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let faceted_fields =
|
||||
["genre", "country", "artist"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_filterable_fields(faceted_fields);
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
index
|
||||
setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
)
|
||||
},
|
||||
move |index| {
|
||||
let config = IndexerConfig::default();
|
||||
@ -211,30 +305,21 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
|
||||
fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(10);
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Indexing songs without any facets", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let index = setup_index();
|
||||
let primary_key = "id";
|
||||
let searchable_fields = ["title", "album", "artist"];
|
||||
let filterable_fields = [];
|
||||
let sortable_fields = [];
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_primary_key("id".to_owned());
|
||||
let displayed_fields =
|
||||
["title", "album", "artist", "genre", "country", "released", "duration"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields =
|
||||
["title", "album", "artist"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
index
|
||||
setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
)
|
||||
},
|
||||
move |index| {
|
||||
let config = IndexerConfig::default();
|
||||
@ -257,29 +342,21 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
|
||||
fn indexing_wiki(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(10);
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Indexing wiki", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let index = setup_index();
|
||||
let primary_key = "id";
|
||||
let searchable_fields = ["title", "body"];
|
||||
let filterable_fields = [];
|
||||
let sortable_fields = [];
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_primary_key("id".to_owned());
|
||||
let displayed_fields =
|
||||
["title", "body", "url"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields = ["title", "body"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
// there is NO faceted fields at all
|
||||
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
index
|
||||
setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
)
|
||||
},
|
||||
move |index| {
|
||||
let config = IndexerConfig::default();
|
||||
@ -301,28 +378,81 @@ fn indexing_wiki(c: &mut Criterion) {
|
||||
});
|
||||
}
|
||||
|
||||
fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Deleting wiki in batches with default settings", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let primary_key = "id";
|
||||
let searchable_fields = ["title", "body"];
|
||||
let filterable_fields = [];
|
||||
let sortable_fields = [];
|
||||
|
||||
let index = setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
);
|
||||
|
||||
// We index only one half of the dataset in the setup part
|
||||
// as we don't care about the time it takes.
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let indexing_config =
|
||||
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||
let mut builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let count = 1250;
|
||||
let batch_size = 250;
|
||||
let document_ids_to_delete =
|
||||
choose_document_ids_from_index_batched(&index, count, batch_size);
|
||||
|
||||
(index, document_ids_to_delete)
|
||||
},
|
||||
move |(index, document_ids_to_delete)| {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
for ids in document_ids_to_delete {
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
builder.delete_documents(&ids);
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
},
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(10);
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Indexing wiki in three batches", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let index = setup_index();
|
||||
let primary_key = "id";
|
||||
let searchable_fields = ["title", "body"];
|
||||
let filterable_fields = [];
|
||||
let sortable_fields = [];
|
||||
|
||||
let index = setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
);
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_primary_key("id".to_owned());
|
||||
let displayed_fields =
|
||||
["title", "body", "url"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields = ["title", "body"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
// there is NO faceted fields at all
|
||||
builder.execute(|_| ()).unwrap();
|
||||
|
||||
// We index only one half of the dataset in the setup part
|
||||
// as we don't care about the time it takes.
|
||||
@ -376,34 +506,21 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
|
||||
fn indexing_movies_default(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(10);
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Indexing movies with default settings", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let index = setup_index();
|
||||
let primary_key = "id";
|
||||
let searchable_fields = ["title", "overview"];
|
||||
let filterable_fields = ["released_date", "genres"];
|
||||
let sortable_fields = [];
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_primary_key("id".to_owned());
|
||||
let displayed_fields = ["title", "poster", "overview", "release_date", "genres"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields =
|
||||
["title", "overview"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let faceted_fields =
|
||||
["released_date", "genres"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_filterable_fields(faceted_fields);
|
||||
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
index
|
||||
setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
)
|
||||
},
|
||||
move |index| {
|
||||
let config = IndexerConfig::default();
|
||||
@ -424,35 +541,80 @@ fn indexing_movies_default(c: &mut Criterion) {
|
||||
});
|
||||
}
|
||||
|
||||
fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Deleting movies in batches with default settings", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let primary_key = "id";
|
||||
let searchable_fields = ["title", "overview"];
|
||||
let filterable_fields = ["released_date", "genres"];
|
||||
let sortable_fields = [];
|
||||
|
||||
let index = setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
);
|
||||
|
||||
// We index only one half of the dataset in the setup part
|
||||
// as we don't care about the time it takes.
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||
builder.add_documents(documents).unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let count = 1250;
|
||||
let batch_size = 250;
|
||||
let document_ids_to_delete =
|
||||
choose_document_ids_from_index_batched(&index, count, batch_size);
|
||||
|
||||
(index, document_ids_to_delete)
|
||||
},
|
||||
move |(index, document_ids_to_delete)| {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
for ids in document_ids_to_delete {
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
builder.delete_documents(&ids);
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
},
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(10);
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Indexing movies in three batches", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let index = setup_index();
|
||||
let primary_key = "id";
|
||||
let searchable_fields = ["title", "overview"];
|
||||
let filterable_fields = ["released_date", "genres"];
|
||||
let sortable_fields = [];
|
||||
|
||||
let index = setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
);
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_primary_key("id".to_owned());
|
||||
let displayed_fields = ["title", "poster", "overview", "release_date", "genres"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields =
|
||||
["title", "overview"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let faceted_fields =
|
||||
["released_date", "genres"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_filterable_fields(faceted_fields);
|
||||
|
||||
builder.execute(|_| ()).unwrap();
|
||||
|
||||
// We index only one half of the dataset in the setup part
|
||||
// as we don't care about the time it takes.
|
||||
let config = IndexerConfig::default();
|
||||
@ -500,17 +662,11 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
|
||||
fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(10);
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Indexing nested movies with default settings", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let index = setup_index();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_primary_key("id".to_owned());
|
||||
let primary_key = "id";
|
||||
let searchable_fields = [
|
||||
"title",
|
||||
"overview",
|
||||
@ -519,12 +675,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
"crew.name",
|
||||
"cast.character",
|
||||
"cast.name",
|
||||
]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
];
|
||||
let filterable_fields = [
|
||||
"popularity",
|
||||
"release_date",
|
||||
@ -540,21 +691,15 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
"crew.name",
|
||||
"cast.character",
|
||||
"cast.name",
|
||||
]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_filterable_fields(filterable_fields);
|
||||
];
|
||||
let sortable_fields = ["popularity", "runtime", "vote_average", "release_date"];
|
||||
|
||||
let sortable_fields = ["popularity", "runtime", "vote_average", "release_date"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_sortable_fields(sortable_fields);
|
||||
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
index
|
||||
setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
)
|
||||
},
|
||||
move |index| {
|
||||
let config = IndexerConfig::default();
|
||||
@ -575,19 +720,13 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
});
|
||||
}
|
||||
|
||||
fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(10);
|
||||
group.bench_function("Indexing nested movies without any facets", |b| {
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Deleting nested movies in batches with default settings", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let index = setup_index();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_primary_key("id".to_owned());
|
||||
let primary_key = "id";
|
||||
let searchable_fields = [
|
||||
"title",
|
||||
"overview",
|
||||
@ -596,14 +735,94 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
"crew.name",
|
||||
"cast.character",
|
||||
"cast.name",
|
||||
]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
builder.execute(|_| ()).unwrap();
|
||||
];
|
||||
let filterable_fields = [
|
||||
"popularity",
|
||||
"release_date",
|
||||
"runtime",
|
||||
"vote_average",
|
||||
"external_ids",
|
||||
"keywords",
|
||||
"providers.buy.name",
|
||||
"providers.rent.name",
|
||||
"providers.flatrate.name",
|
||||
"provider_names",
|
||||
"genres",
|
||||
"crew.name",
|
||||
"cast.character",
|
||||
"cast.name",
|
||||
];
|
||||
let sortable_fields = ["popularity", "runtime", "vote_average", "release_date"];
|
||||
|
||||
let index = setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
);
|
||||
|
||||
// We index only one half of the dataset in the setup part
|
||||
// as we don't care about the time it takes.
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
||||
builder.add_documents(documents).unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
index
|
||||
|
||||
let count = 1250;
|
||||
let batch_size = 250;
|
||||
let document_ids_to_delete =
|
||||
choose_document_ids_from_index_batched(&index, count, batch_size);
|
||||
|
||||
(index, document_ids_to_delete)
|
||||
},
|
||||
move |(index, document_ids_to_delete)| {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
for ids in document_ids_to_delete {
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
builder.delete_documents(&ids);
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
},
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Indexing nested movies without any facets", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let primary_key = "id";
|
||||
let searchable_fields = [
|
||||
"title",
|
||||
"overview",
|
||||
"provider_names",
|
||||
"genres",
|
||||
"crew.name",
|
||||
"cast.character",
|
||||
"cast.name",
|
||||
];
|
||||
let filterable_fields = [];
|
||||
let sortable_fields = [];
|
||||
|
||||
setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
)
|
||||
},
|
||||
move |index| {
|
||||
let config = IndexerConfig::default();
|
||||
@ -626,39 +845,21 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
|
||||
fn indexing_geo(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(10);
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Indexing geo_point", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let index = setup_index();
|
||||
let primary_key = "geonameid";
|
||||
let searchable_fields = ["name", "alternatenames", "elevation"];
|
||||
let filterable_fields = ["_geo", "population", "elevation"];
|
||||
let sortable_fields = ["_geo", "population", "elevation"];
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_primary_key("geonameid".to_owned());
|
||||
let displayed_fields =
|
||||
["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields =
|
||||
["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let filterable_fields =
|
||||
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_filterable_fields(filterable_fields);
|
||||
|
||||
let sortable_fields =
|
||||
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_sortable_fields(sortable_fields);
|
||||
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
index
|
||||
setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
)
|
||||
},
|
||||
move |index| {
|
||||
let config = IndexerConfig::default();
|
||||
@ -680,18 +881,78 @@ fn indexing_geo(c: &mut Criterion) {
|
||||
});
|
||||
}
|
||||
|
||||
fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("indexing");
|
||||
group.sample_size(BENCHMARK_ITERATION);
|
||||
group.bench_function("Deleting geo_point in batches with default settings", |b| {
|
||||
b.iter_with_setup(
|
||||
move || {
|
||||
let primary_key = "geonameid";
|
||||
let searchable_fields = ["name", "alternatenames", "elevation"];
|
||||
let filterable_fields = ["_geo", "population", "elevation"];
|
||||
let sortable_fields = ["_geo", "population", "elevation"];
|
||||
|
||||
let index = setup_index_with_settings(
|
||||
&primary_key,
|
||||
&searchable_fields,
|
||||
&filterable_fields,
|
||||
&sortable_fields,
|
||||
);
|
||||
|
||||
// We index only one half of the dataset in the setup part
|
||||
// as we don't care about the time it takes.
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "json");
|
||||
builder.add_documents(documents).unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let count = 1250;
|
||||
let batch_size = 250;
|
||||
let document_ids_to_delete =
|
||||
choose_document_ids_from_index_batched(&index, count, batch_size);
|
||||
|
||||
(index, document_ids_to_delete)
|
||||
},
|
||||
move |(index, document_ids_to_delete)| {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
for ids in document_ids_to_delete {
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
builder.delete_documents(&ids);
|
||||
builder.execute().unwrap();
|
||||
}
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
},
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
indexing_songs_default,
|
||||
deleting_songs_in_batches_default,
|
||||
indexing_songs_without_faceted_numbers,
|
||||
indexing_songs_without_faceted_fields,
|
||||
indexing_songs_in_three_batches_default,
|
||||
indexing_wiki,
|
||||
deleting_wiki_in_batches_default,
|
||||
indexing_wiki_in_three_batches,
|
||||
indexing_movies_default,
|
||||
deleting_movies_in_batches_default,
|
||||
indexing_movies_in_three_batches,
|
||||
indexing_nested_movies_default,
|
||||
deleting_nested_movies_in_batches_default,
|
||||
indexing_nested_movies_without_faceted_fields,
|
||||
indexing_geo
|
||||
indexing_geo,
|
||||
deleting_geo_in_batches_default
|
||||
);
|
||||
criterion_main!(benches);
|
||||
|
Loading…
x
Reference in New Issue
Block a user