MeiliSearch/benchmarks/benches/utils.rs

use std::fs::{create_dir_all, remove_dir_all, File};
use std::path::Path;

use criterion::BenchmarkId;
use heed::EnvOpenOptions;
use milli::{
    update::{IndexDocumentsMethod, Settings, UpdateBuilder, UpdateFormat},
    FilterCondition, Index,
};

pub struct Conf<'a> {
    /// where we are going to create our database.mmdb directory
    /// each benchmark will first try to delete it and then recreate it
    pub database_name: &'a str,
    /// the dataset to be used, it must be an uncompressed csv
    pub dataset: &'a str,
    pub group_name: &'a str,
    pub queries: &'a [&'a str],
    /// here you can change which criterion are used and in which order.
    /// - if you specify something all the base configuration will be thrown out
    /// - if you don't specify anything (None) the default configuration will be kept
    pub criterion: Option<&'a [&'a str]>,
    /// the last chance to configure your database as you want
    pub configure: fn(&mut Settings),
    pub filter: Option<&'a str>,
    /// enable or disable the optional words on the query
    pub optional_words: bool,
    /// primary key, if there is None we'll auto-generate docids for every documents
    pub primary_key: Option<&'a str>,
}

impl Conf<'_> {
    pub const BASE: Self = Conf {
        database_name: "benches.mmdb",
        dataset: "",
        group_name: "",
        queries: &[],
        criterion: None,
        configure: |_| (),
        filter: None,
        optional_words: true,
        primary_key: None,
    };
}

pub fn base_setup(conf: &Conf) -> Index {
    match remove_dir_all(&conf.database_name) {
        Ok(_) => (),
        Err(e) if e.kind() == std::io::ErrorKind::NotFound => (),
        Err(e) => panic!("{}", e),
    }
    create_dir_all(&conf.database_name).unwrap();

    let mut options = EnvOpenOptions::new();
    options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
    options.max_readers(10);
    let index = Index::new(options, conf.database_name).unwrap();
    if let Some(primary_key) = conf.primary_key {
        let mut wtxn = index.write_txn().unwrap();
        index.put_primary_key(&mut wtxn, primary_key).unwrap();
    }

    let update_builder = UpdateBuilder::new(0);
    let mut wtxn = index.write_txn().unwrap();
    let mut builder = update_builder.settings(&mut wtxn, &index);

    if let Some(criterion) = conf.criterion {
        builder.reset_filterable_fields();
        builder.reset_criteria();
        builder.reset_stop_words();

        let criterion = criterion.iter().map(|s| s.to_string()).collect();
        builder.set_criteria(criterion);
    }

    (conf.configure)(&mut builder);

    builder.execute(|_, _| ()).unwrap();
    wtxn.commit().unwrap();

    let update_builder = UpdateBuilder::new(0);
    let mut wtxn = index.write_txn().unwrap();
    let mut builder = update_builder.index_documents(&mut wtxn, &index);
    if let None = conf.primary_key {
        builder.enable_autogenerate_docids();
    }
    builder.update_format(UpdateFormat::Csv);
    builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
    let reader = File::open(conf.dataset)
        .expect(&format!("could not find the dataset in: {}", conf.dataset));
    builder.execute(reader, |_, _| ()).unwrap();
    wtxn.commit().unwrap();

    index
}

pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
    for conf in confs {
        let index = base_setup(conf);

        let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap();
        let name = format!("{}: {}", file_name, conf.group_name);
        let mut group = c.benchmark_group(&name);

        for &query in conf.queries {
            group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
                b.iter(|| {
                    let rtxn = index.read_txn().unwrap();
                    let mut search = index.search(&rtxn);
                    search.query(query).optional_words(conf.optional_words);
                    if let Some(filter) = conf.filter {
                        let filter = FilterCondition::from_str(&rtxn, &index, filter).unwrap();
                        search.filter(filter);
                    }
                    let _ids = search.execute().unwrap();
                });
            });
        }
        group.finish();
    }
}
fix the facets conditions 2021-04-14 16:26:21 +02:00			`use std::fs::{create_dir_all, remove_dir_all, File};`
Reduce the length of the benchmarks names 2021-06-03 15:59:43 +02:00			`use std::path::Path;`
push a first version of the benchmark for the typo 2021-04-01 18:54:14 +02:00
merge all the criterion only benchmarks in one file 2021-04-07 11:50:38 +02:00			`use criterion::BenchmarkId;`
reformat all the files 2021-04-14 13:13:33 +02:00			`use heed::EnvOpenOptions;`
			`use milli::{`
			`update::{IndexDocumentsMethod, Settings, UpdateBuilder, UpdateFormat},`
Fix the benchmarks compilation 2021-06-03 10:33:42 +02:00			`FilterCondition, Index,`
reformat all the files 2021-04-14 13:13:33 +02:00			`};`
push a first version of the benchmark for the typo 2021-04-01 18:54:14 +02:00
merge all the criterion only benchmarks in one file 2021-04-07 11:50:38 +02:00			`pub struct Conf<'a> {`
add a bunch of queries and start the introduction of the filters and the new dataset 2021-04-13 10:44:27 +02:00			`/// where we are going to create our database.mmdb directory`
			`/// each benchmark will first try to delete it and then recreate it`
			`pub database_name: &'a str,`
			`/// the dataset to be used, it must be an uncompressed csv`
			`pub dataset: &'a str,`
merge all the criterion only benchmarks in one file 2021-04-07 11:50:38 +02:00			`pub group_name: &'a str,`
reformat all the files 2021-04-14 13:13:33 +02:00			`pub queries: &'a [&'a str],`
add the configuration of the searchable fields and displayed fields and a default configuration for the songs 2021-04-13 11:40:16 +02:00			`/// here you can change which criterion are used and in which order.`
			`/// - if you specify something all the base configuration will be thrown out`
			`/// - if you don't specify anything (None) the default configuration will be kept`
merge all the criterion only benchmarks in one file 2021-04-07 11:50:38 +02:00			`pub criterion: Option<&'a [&'a str]>,`
add the configuration of the searchable fields and displayed fields and a default configuration for the songs 2021-04-13 11:40:16 +02:00			`/// the last chance to configure your database as you want`
			`pub configure: fn(&mut Settings),`
Fix the benchmarks compilation 2021-06-03 10:33:42 +02:00			`pub filter: Option<&'a str>,`
add the configuration of the searchable fields and displayed fields and a default configuration for the songs 2021-04-13 11:40:16 +02:00			`/// enable or disable the optional words on the query`
merge all the criterion only benchmarks in one file 2021-04-07 11:50:38 +02:00			`pub optional_words: bool,`
move the benchmarks to another crate so we can download the datasets automatically without adding overhead to the build of milli 2021-05-25 17:09:14 +02:00			`/// primary key, if there is None we'll auto-generate docids for every documents`
			`pub primary_key: Option<&'a str>,`
merge all the criterion only benchmarks in one file 2021-04-07 11:50:38 +02:00			`}`

add a bunch of queries and start the introduction of the filters and the new dataset 2021-04-13 10:44:27 +02:00			`impl Conf<'_> {`
			`pub const BASE: Self = Conf {`
			`database_name: "benches.mmdb",`
			`dataset: "",`
			`group_name: "",`
			`queries: &[],`
			`criterion: None,`
remove the nop function 2021-06-02 17:09:21 +02:00			`configure: \|_\| (),`
Fix the benchmarks compilation 2021-06-03 10:33:42 +02:00			`filter: None,`
add a bunch of queries and start the introduction of the filters and the new dataset 2021-04-13 10:44:27 +02:00			`optional_words: true,`
move the benchmarks to another crate so we can download the datasets automatically without adding overhead to the build of milli 2021-05-25 17:09:14 +02:00			`primary_key: None,`
add a bunch of queries and start the introduction of the filters and the new dataset 2021-04-13 10:44:27 +02:00			`};`
			`}`

add the configuration of the searchable fields and displayed fields and a default configuration for the songs 2021-04-13 11:40:16 +02:00			`pub fn base_setup(conf: &Conf) -> Index {`
			`match remove_dir_all(&conf.database_name) {`
add a bunch of queries and start the introduction of the filters and the new dataset 2021-04-13 10:44:27 +02:00			`Ok(_) => (),`
			`Err(e) if e.kind() == std::io::ErrorKind::NotFound => (),`
			`Err(e) => panic!("{}", e),`
			`}`
add the configuration of the searchable fields and displayed fields and a default configuration for the songs 2021-04-13 11:40:16 +02:00			`create_dir_all(&conf.database_name).unwrap();`
push a first version of the benchmark for the typo 2021-04-01 18:54:14 +02:00
			`let mut options = EnvOpenOptions::new();`
			`options.map_size(100 * 1024 * 1024 * 1024); // 100 GB`
			`options.max_readers(10);`
add the configuration of the searchable fields and displayed fields and a default configuration for the songs 2021-04-13 11:40:16 +02:00			`let index = Index::new(options, conf.database_name).unwrap();`
add a way to provide primary_key or autogenerate documents ids 2021-05-25 17:55:45 +02:00			`if let Some(primary_key) = conf.primary_key {`
			`let mut wtxn = index.write_txn().unwrap();`
			`index.put_primary_key(&mut wtxn, primary_key).unwrap();`
			`}`
push a first version of the benchmark for the typo 2021-04-01 18:54:14 +02:00
			`let update_builder = UpdateBuilder::new(0);`
			`let mut wtxn = index.write_txn().unwrap();`
			`let mut builder = update_builder.settings(&mut wtxn, &index);`

add the configuration of the searchable fields and displayed fields and a default configuration for the songs 2021-04-13 11:40:16 +02:00			`if let Some(criterion) = conf.criterion {`
Fix the benchmarks compilation 2021-06-03 10:33:42 +02:00			`builder.reset_filterable_fields();`
push a first version of the benchmark for the typo 2021-04-01 18:54:14 +02:00			`builder.reset_criteria();`
			`builder.reset_stop_words();`

add the configuration of the searchable fields and displayed fields and a default configuration for the songs 2021-04-13 11:40:16 +02:00			`let criterion = criterion.iter().map(\|s\| s.to_string()).collect();`
merge all the criterion only benchmarks in one file 2021-04-07 11:50:38 +02:00			`builder.set_criteria(criterion);`
push a first version of the benchmark for the typo 2021-04-01 18:54:14 +02:00			`}`

add the configuration of the searchable fields and displayed fields and a default configuration for the songs 2021-04-13 11:40:16 +02:00			`(conf.configure)(&mut builder);`

push a first version of the benchmark for the typo 2021-04-01 18:54:14 +02:00			`builder.execute(\|_, _\| ()).unwrap();`
			`wtxn.commit().unwrap();`

			`let update_builder = UpdateBuilder::new(0);`
			`let mut wtxn = index.write_txn().unwrap();`
			`let mut builder = update_builder.index_documents(&mut wtxn, &index);`
add a way to provide primary_key or autogenerate documents ids 2021-05-25 17:55:45 +02:00			`if let None = conf.primary_key {`
			`builder.enable_autogenerate_docids();`
			`}`
push a first version of the benchmark for the typo 2021-04-01 18:54:14 +02:00			`builder.update_format(UpdateFormat::Csv);`
			`builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);`
move the benchmarks to another crate so we can download the datasets automatically without adding overhead to the build of milli 2021-05-25 17:09:14 +02:00			`let reader = File::open(conf.dataset)`
			`.expect(&format!("could not find the dataset in: {}", conf.dataset));`
push a first version of the benchmark for the typo 2021-04-01 18:54:14 +02:00			`builder.execute(reader, \|_, _\| ()).unwrap();`
			`wtxn.commit().unwrap();`

			`index`
			`}`
merge all the criterion only benchmarks in one file 2021-04-07 11:50:38 +02:00
			`pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {`
			`for conf in confs {`
add the configuration of the searchable fields and displayed fields and a default configuration for the songs 2021-04-13 11:40:16 +02:00			`let index = base_setup(conf);`
merge all the criterion only benchmarks in one file 2021-04-07 11:50:38 +02:00
Reduce the length of the benchmarks names 2021-06-03 15:59:43 +02:00			`let file_name = Path::new(conf.dataset).file_name().and_then(\|f\| f.to_str()).unwrap();`
			`let name = format!("{}: {}", file_name, conf.group_name);`
			`let mut group = c.benchmark_group(&name);`
merge all the criterion only benchmarks in one file 2021-04-07 11:50:38 +02:00
			`for &query in conf.queries {`
			`group.bench_with_input(BenchmarkId::from_parameter(query), &query, \|b, &query\| {`
			`b.iter(\|\| {`
			`let rtxn = index.read_txn().unwrap();`
add a bunch of queries and start the introduction of the filters and the new dataset 2021-04-13 10:44:27 +02:00			`let mut search = index.search(&rtxn);`
			`search.query(query).optional_words(conf.optional_words);`
Fix the benchmarks compilation 2021-06-03 10:33:42 +02:00			`if let Some(filter) = conf.filter {`
Reduce the length of the benchmarks names 2021-06-03 15:59:43 +02:00			`let filter = FilterCondition::from_str(&rtxn, &index, filter).unwrap();`
Fix the benchmarks compilation 2021-06-03 10:33:42 +02:00			`search.filter(filter);`
add a bunch of queries and start the introduction of the filters and the new dataset 2021-04-13 10:44:27 +02:00			`}`
			`let _ids = search.execute().unwrap();`
merge all the criterion only benchmarks in one file 2021-04-07 11:50:38 +02:00			`});`
			`});`
			`}`
			`group.finish();`
			`}`
			`}`