push a first version of the benchmark for the typo

This commit is contained in:
tamo 2021-04-01 18:54:14 +02:00 committed by Tamo
parent 270da98c46
commit 4fdbfd6048
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
4 changed files with 69 additions and 15 deletions

View File

@ -61,5 +61,5 @@ rand = "0.8.3"
default = []
[[bench]]
name = "search"
name = "typo"
harness = false

8
milli/benches/README.md Normal file
View File

@ -0,0 +1,8 @@
Benchmarks
==========
For our benchmark we are using a small subset of the dataset songs.csv. It was generated with this command:
```
xsv sample --seed 42 song.csv -o smol_songs.csv
```
The original songs.csv datasets is available [here](https://meili-datasets.s3.fr-par.scw.cloud/songs.csv.gz)

View File

@ -1,22 +1,27 @@
use std::time::Duration;
mod utils;
use heed::EnvOpenOptions;
use milli::Index;
use std::time::Duration;
use criterion::{criterion_group, criterion_main, BenchmarkId};
fn bench_search(c: &mut criterion::Criterion) {
let database = "books-4cpu.mmdb";
fn bench_typo(c: &mut criterion::Criterion) {
let index = utils::base_setup(Some(vec!["typo".to_string()]));
let queries = [
"minogue kylie",
"minogue kylie live",
"mongus ",
"thelonius monk ",
"Disnaylande ",
"the white striper ",
"indochie ",
"indochien ",
"klub des loopers ",
"fear of the duck ",
"michel depech ",
"stromal ",
"dire straights ",
"Arethla Franklin ",
];
let mut options = EnvOpenOptions::new();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(10);
let index = Index::new(options, database).unwrap();
let mut group = c.benchmark_group("search");
let mut group = c.benchmark_group("typo");
group.sample_size(10);
group.measurement_time(Duration::from_secs(12));
@ -32,5 +37,5 @@ fn bench_search(c: &mut criterion::Criterion) {
group.finish();
}
criterion_group!(benches, bench_search);
criterion_group!(benches, bench_typo);
criterion_main!(benches);

41
milli/benches/utils.rs Normal file
View File

@ -0,0 +1,41 @@
use std::{fs::{File, create_dir_all}};
use heed::EnvOpenOptions;
use milli::{Index, update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}};
pub fn base_setup(criteria: Option<Vec<String>>) -> Index {
let database = "songs.mmdb";
create_dir_all(&database).unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(10);
let index = Index::new(options, database).unwrap();
let update_builder = UpdateBuilder::new(0);
let mut wtxn = index.write_txn().unwrap();
let mut builder = update_builder.settings(&mut wtxn, &index);
if let Some(criteria) = criteria {
builder.reset_faceted_fields();
builder.reset_criteria();
builder.reset_stop_words();
builder.set_criteria(criteria);
}
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();
let update_builder = UpdateBuilder::new(0);
let mut wtxn = index.write_txn().unwrap();
let mut builder = update_builder.index_documents(&mut wtxn, &index);
builder.update_format(UpdateFormat::Csv);
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
// we called from cargo the current directory is supposed to be milli/milli
let reader = File::open("benches/smol_songs.csv").unwrap();
builder.execute(reader, |_, _| ()).unwrap();
wtxn.commit().unwrap();
index
}