mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 05:54:30 +01:00
push a first version of the benchmark for the typo
This commit is contained in:
parent
270da98c46
commit
4fdbfd6048
@ -61,5 +61,5 @@ rand = "0.8.3"
|
||||
default = []
|
||||
|
||||
[[bench]]
|
||||
name = "search"
|
||||
name = "typo"
|
||||
harness = false
|
||||
|
8
milli/benches/README.md
Normal file
8
milli/benches/README.md
Normal file
@ -0,0 +1,8 @@
|
||||
Benchmarks
|
||||
==========
|
||||
|
||||
For our benchmark we are using a small subset of the dataset songs.csv. It was generated with this command:
|
||||
```
|
||||
xsv sample --seed 42 song.csv -o smol_songs.csv
|
||||
```
|
||||
The original songs.csv datasets is available [here](https://meili-datasets.s3.fr-par.scw.cloud/songs.csv.gz)
|
@ -1,22 +1,27 @@
|
||||
use std::time::Duration;
|
||||
mod utils;
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::Index;
|
||||
use std::time::Duration;
|
||||
use criterion::{criterion_group, criterion_main, BenchmarkId};
|
||||
|
||||
fn bench_search(c: &mut criterion::Criterion) {
|
||||
let database = "books-4cpu.mmdb";
|
||||
fn bench_typo(c: &mut criterion::Criterion) {
|
||||
let index = utils::base_setup(Some(vec!["typo".to_string()]));
|
||||
|
||||
let queries = [
|
||||
"minogue kylie",
|
||||
"minogue kylie live",
|
||||
"mongus ",
|
||||
"thelonius monk ",
|
||||
"Disnaylande ",
|
||||
"the white striper ",
|
||||
"indochie ",
|
||||
"indochien ",
|
||||
"klub des loopers ",
|
||||
"fear of the duck ",
|
||||
"michel depech ",
|
||||
"stromal ",
|
||||
"dire straights ",
|
||||
"Arethla Franklin ",
|
||||
];
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
options.max_readers(10);
|
||||
let index = Index::new(options, database).unwrap();
|
||||
|
||||
let mut group = c.benchmark_group("search");
|
||||
let mut group = c.benchmark_group("typo");
|
||||
group.sample_size(10);
|
||||
group.measurement_time(Duration::from_secs(12));
|
||||
|
||||
@ -32,5 +37,5 @@ fn bench_search(c: &mut criterion::Criterion) {
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_search);
|
||||
criterion_group!(benches, bench_typo);
|
||||
criterion_main!(benches);
|
41
milli/benches/utils.rs
Normal file
41
milli/benches/utils.rs
Normal file
@ -0,0 +1,41 @@
|
||||
use std::{fs::{File, create_dir_all}};
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::{Index, update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}};
|
||||
|
||||
pub fn base_setup(criteria: Option<Vec<String>>) -> Index {
|
||||
let database = "songs.mmdb";
|
||||
create_dir_all(&database).unwrap();
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
options.max_readers(10);
|
||||
let index = Index::new(options, database).unwrap();
|
||||
|
||||
let update_builder = UpdateBuilder::new(0);
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = update_builder.settings(&mut wtxn, &index);
|
||||
|
||||
if let Some(criteria) = criteria {
|
||||
builder.reset_faceted_fields();
|
||||
builder.reset_criteria();
|
||||
builder.reset_stop_words();
|
||||
|
||||
builder.set_criteria(criteria);
|
||||
}
|
||||
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let update_builder = UpdateBuilder::new(0);
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = update_builder.index_documents(&mut wtxn, &index);
|
||||
builder.update_format(UpdateFormat::Csv);
|
||||
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
||||
// we called from cargo the current directory is supposed to be milli/milli
|
||||
let reader = File::open("benches/smol_songs.csv").unwrap();
|
||||
builder.execute(reader, |_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user