mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 13:10:06 +01:00
uses an env variable to find the datasets
This commit is contained in:
parent
4969abeaab
commit
3c84075d2d
@ -13,3 +13,15 @@ You can run the following command from the root of this git repository
|
||||
```
|
||||
wget https://meili-datasets.s3.fr-par.scw.cloud/benchmarks/smol-songs.csv.gz -O milli/benches/smol-songs.csv.gz
|
||||
```
|
||||
|
||||
- To run all the benchmarks we recommand using `cargo bench`, this should takes around ~4h
|
||||
- You can also run the benchmarks on the `songs` dataset with `cargo bench --bench songs`, it should takes around 1h
|
||||
- And on the `wiki` dataset with `cargo bench --bench wiki`, it should takes around 3h
|
||||
|
||||
By default the benchmarks expect the datasets to be uncompressed and present in `milli/milli/benches`, but you can also specify your own path with the environment variable `MILLI_BENCH_DATASETS_PATH` like that:
|
||||
```
|
||||
MILLI_BENCH_DATASETS_PATH=~/Downloads/datasets cargo bench --bench songs
|
||||
```
|
||||
|
||||
Our benchmarking suite uses criterion which allow you to do a lot of configuration, see the documentation [here](https://bheisler.github.io/criterion.rs/book/user_guide/user_guide.html)
|
||||
|
||||
|
@ -7,6 +7,15 @@ use milli::{
|
||||
FacetCondition, Index,
|
||||
};
|
||||
|
||||
/// The name of the environment variable used to select the path
|
||||
/// of the directory containing the datasets
|
||||
const BASE_DATASETS_PATH_KEY: &str = "MILLI_BENCH_DATASETS_PATH";
|
||||
|
||||
/// The default path for the dataset if nothing is specified
|
||||
/// By default we chose `milli/benches` because any cargo command ran in `milli/milli/**` will be
|
||||
/// executed with a pwd of `milli/milli`
|
||||
const DEFAULT_DATASETS_PATH: &str = "milli/benches";
|
||||
|
||||
pub struct Conf<'a> {
|
||||
/// where we are going to create our database.mmdb directory
|
||||
/// each benchmark will first try to delete it and then recreate it
|
||||
@ -78,7 +87,10 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
builder.update_format(UpdateFormat::Csv);
|
||||
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
||||
// we called from cargo the current directory is supposed to be milli/milli
|
||||
let dataset_path = format!("benches/{}", conf.dataset);
|
||||
let base_dataset_path = std::env::vars()
|
||||
.find(|var| var.0 == BASE_DATASETS_PATH_KEY)
|
||||
.map_or(DEFAULT_DATASETS_PATH.to_owned(), |(_key, value)| value);
|
||||
let dataset_path = format!("{}/{}", base_dataset_path, conf.dataset);
|
||||
let reader = File::open(&dataset_path)
|
||||
.expect(&format!("could not find the dataset in: {}", &dataset_path));
|
||||
builder.execute(reader, |_, _| ()).unwrap();
|
||||
@ -100,7 +112,8 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
||||
let mut search = index.search(&rtxn);
|
||||
search.query(query).optional_words(conf.optional_words);
|
||||
if let Some(facet_condition) = conf.facet_condition {
|
||||
let facet_condition = FacetCondition::from_str(&rtxn, &index, facet_condition).unwrap();
|
||||
let facet_condition =
|
||||
FacetCondition::from_str(&rtxn, &index, facet_condition).unwrap();
|
||||
search.facet_condition(facet_condition);
|
||||
}
|
||||
let _ids = search.execute().unwrap();
|
||||
|
Loading…
x
Reference in New Issue
Block a user