mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 16:24:26 +01:00
commit
521df85c0d
@ -42,6 +42,8 @@ nightly = []
|
||||
csv = "1.0"
|
||||
elapsed = "0.1"
|
||||
quickcheck = "0.7"
|
||||
rand = "0.6"
|
||||
rand_xorshift = "0.1"
|
||||
structopt = "0.2"
|
||||
tempfile = "3.0"
|
||||
termcolor = "1.0"
|
||||
|
@ -318,3 +318,321 @@ mod tests {
|
||||
Ok(dir.close()?)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(feature = "nightly", test))]
|
||||
mod bench {
|
||||
extern crate test;
|
||||
|
||||
use super::*;
|
||||
use std::error::Error;
|
||||
use std::iter::repeat_with;
|
||||
use self::test::Bencher;
|
||||
|
||||
use rand::distributions::Alphanumeric;
|
||||
use rand_xorshift::XorShiftRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use rand::seq::SliceRandom;
|
||||
use serde_derive::Serialize;
|
||||
|
||||
use crate::tokenizer::DefaultBuilder;
|
||||
use crate::database::update::UpdateBuilder;
|
||||
use crate::database::schema::*;
|
||||
|
||||
fn random_sentences<R: Rng>(number: usize, rng: &mut R) -> String {
|
||||
let mut words = String::new();
|
||||
|
||||
for i in 0..number {
|
||||
let word_len = rng.gen_range(1, 12);
|
||||
let iter = repeat_with(|| rng.sample(Alphanumeric)).take(word_len);
|
||||
words.extend(iter);
|
||||
|
||||
if i == number - 1 { // last word
|
||||
let final_ = [".", "?", "!", "..."].choose(rng).cloned();
|
||||
words.extend(final_);
|
||||
} else {
|
||||
let middle = [",", ", "].choose(rng).cloned();
|
||||
words.extend(middle);
|
||||
}
|
||||
}
|
||||
|
||||
words
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn open_little_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
|
||||
let dir = tempfile::tempdir()?;
|
||||
|
||||
let mut builder = SchemaBuilder::with_identifier("id");
|
||||
builder.new_attribute("title", STORED | INDEXED);
|
||||
builder.new_attribute("description", STORED | INDEXED);
|
||||
let schema = builder.build();
|
||||
|
||||
let db_path = dir.path().join("bench.mdb");
|
||||
let database = Database::create(db_path.clone(), schema.clone())?;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct Document {
|
||||
id: u64,
|
||||
title: String,
|
||||
description: String,
|
||||
}
|
||||
|
||||
let path = dir.path().join("update-000.sst");
|
||||
let tokenizer_builder = DefaultBuilder;
|
||||
let mut builder = UpdateBuilder::new(path, schema.clone());
|
||||
let mut rng = XorShiftRng::seed_from_u64(42);
|
||||
|
||||
for i in 0..300 {
|
||||
let document = Document {
|
||||
id: i,
|
||||
title: random_sentences(rng.gen_range(1, 8), &mut rng),
|
||||
description: random_sentences(rng.gen_range(20, 200), &mut rng),
|
||||
};
|
||||
builder.update_document(&document, &tokenizer_builder)?;
|
||||
}
|
||||
|
||||
let update = builder.build()?;
|
||||
database.ingest_update_file(update)?;
|
||||
|
||||
drop(database);
|
||||
|
||||
bench.iter(|| {
|
||||
let database = Database::open(db_path.clone()).unwrap();
|
||||
test::black_box(|| database);
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn open_medium_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
|
||||
let dir = tempfile::tempdir()?;
|
||||
|
||||
let mut builder = SchemaBuilder::with_identifier("id");
|
||||
builder.new_attribute("title", STORED | INDEXED);
|
||||
builder.new_attribute("description", STORED | INDEXED);
|
||||
let schema = builder.build();
|
||||
|
||||
let db_path = dir.path().join("bench.mdb");
|
||||
let database = Database::create(db_path.clone(), schema.clone())?;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct Document {
|
||||
id: u64,
|
||||
title: String,
|
||||
description: String,
|
||||
}
|
||||
|
||||
let path = dir.path().join("update-000.sst");
|
||||
let tokenizer_builder = DefaultBuilder;
|
||||
let mut builder = UpdateBuilder::new(path, schema.clone());
|
||||
let mut rng = XorShiftRng::seed_from_u64(42);
|
||||
|
||||
for i in 0..3000 {
|
||||
let document = Document {
|
||||
id: i,
|
||||
title: random_sentences(rng.gen_range(1, 8), &mut rng),
|
||||
description: random_sentences(rng.gen_range(20, 200), &mut rng),
|
||||
};
|
||||
builder.update_document(&document, &tokenizer_builder)?;
|
||||
}
|
||||
|
||||
let update = builder.build()?;
|
||||
database.ingest_update_file(update)?;
|
||||
|
||||
drop(database);
|
||||
|
||||
bench.iter(|| {
|
||||
let database = Database::open(db_path.clone()).unwrap();
|
||||
test::black_box(|| database);
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[bench]
|
||||
#[ignore]
|
||||
fn open_big_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
|
||||
let dir = tempfile::tempdir()?;
|
||||
|
||||
let mut builder = SchemaBuilder::with_identifier("id");
|
||||
builder.new_attribute("title", STORED | INDEXED);
|
||||
builder.new_attribute("description", STORED | INDEXED);
|
||||
let schema = builder.build();
|
||||
|
||||
let db_path = dir.path().join("bench.mdb");
|
||||
let database = Database::create(db_path.clone(), schema.clone())?;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct Document {
|
||||
id: u64,
|
||||
title: String,
|
||||
description: String,
|
||||
}
|
||||
|
||||
let path = dir.path().join("update-000.sst");
|
||||
let tokenizer_builder = DefaultBuilder;
|
||||
let mut builder = UpdateBuilder::new(path, schema.clone());
|
||||
let mut rng = XorShiftRng::seed_from_u64(42);
|
||||
|
||||
for i in 0..30_000 {
|
||||
let document = Document {
|
||||
id: i,
|
||||
title: random_sentences(rng.gen_range(1, 8), &mut rng),
|
||||
description: random_sentences(rng.gen_range(20, 200), &mut rng),
|
||||
};
|
||||
builder.update_document(&document, &tokenizer_builder)?;
|
||||
}
|
||||
|
||||
let update = builder.build()?;
|
||||
database.ingest_update_file(update)?;
|
||||
|
||||
drop(database);
|
||||
|
||||
bench.iter(|| {
|
||||
let database = Database::open(db_path.clone()).unwrap();
|
||||
test::black_box(|| database);
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn search_oneletter_little_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
|
||||
let dir = tempfile::tempdir()?;
|
||||
|
||||
let mut builder = SchemaBuilder::with_identifier("id");
|
||||
builder.new_attribute("title", STORED | INDEXED);
|
||||
builder.new_attribute("description", STORED | INDEXED);
|
||||
let schema = builder.build();
|
||||
|
||||
let db_path = dir.path().join("bench.mdb");
|
||||
let database = Database::create(db_path.clone(), schema.clone())?;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct Document {
|
||||
id: u64,
|
||||
title: String,
|
||||
description: String,
|
||||
}
|
||||
|
||||
let path = dir.path().join("update-000.sst");
|
||||
let tokenizer_builder = DefaultBuilder;
|
||||
let mut builder = UpdateBuilder::new(path, schema.clone());
|
||||
let mut rng = XorShiftRng::seed_from_u64(42);
|
||||
|
||||
for i in 0..300 {
|
||||
let document = Document {
|
||||
id: i,
|
||||
title: random_sentences(rng.gen_range(1, 8), &mut rng),
|
||||
description: random_sentences(rng.gen_range(20, 200), &mut rng),
|
||||
};
|
||||
builder.update_document(&document, &tokenizer_builder)?;
|
||||
}
|
||||
|
||||
let update = builder.build()?;
|
||||
let view = database.ingest_update_file(update)?;
|
||||
|
||||
bench.iter(|| {
|
||||
for q in &["a", "b", "c", "d", "e"] {
|
||||
let documents = view.query_builder().unwrap().query(q, 0..20);
|
||||
test::black_box(|| documents);
|
||||
}
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn search_oneletter_medium_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
|
||||
let dir = tempfile::tempdir()?;
|
||||
|
||||
let mut builder = SchemaBuilder::with_identifier("id");
|
||||
builder.new_attribute("title", STORED | INDEXED);
|
||||
builder.new_attribute("description", STORED | INDEXED);
|
||||
let schema = builder.build();
|
||||
|
||||
let db_path = dir.path().join("bench.mdb");
|
||||
let database = Database::create(db_path.clone(), schema.clone())?;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct Document {
|
||||
id: u64,
|
||||
title: String,
|
||||
description: String,
|
||||
}
|
||||
|
||||
let path = dir.path().join("update-000.sst");
|
||||
let tokenizer_builder = DefaultBuilder;
|
||||
let mut builder = UpdateBuilder::new(path, schema.clone());
|
||||
let mut rng = XorShiftRng::seed_from_u64(42);
|
||||
|
||||
for i in 0..3000 {
|
||||
let document = Document {
|
||||
id: i,
|
||||
title: random_sentences(rng.gen_range(1, 8), &mut rng),
|
||||
description: random_sentences(rng.gen_range(20, 200), &mut rng),
|
||||
};
|
||||
builder.update_document(&document, &tokenizer_builder)?;
|
||||
}
|
||||
|
||||
let update = builder.build()?;
|
||||
let view = database.ingest_update_file(update)?;
|
||||
|
||||
bench.iter(|| {
|
||||
for q in &["a", "b", "c", "d", "e"] {
|
||||
let documents = view.query_builder().unwrap().query(q, 0..20);
|
||||
test::black_box(|| documents);
|
||||
}
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[bench]
|
||||
#[ignore]
|
||||
fn search_oneletter_big_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
|
||||
let dir = tempfile::tempdir()?;
|
||||
|
||||
let mut builder = SchemaBuilder::with_identifier("id");
|
||||
builder.new_attribute("title", STORED | INDEXED);
|
||||
builder.new_attribute("description", STORED | INDEXED);
|
||||
let schema = builder.build();
|
||||
|
||||
let db_path = dir.path().join("bench.mdb");
|
||||
let database = Database::create(db_path.clone(), schema.clone())?;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct Document {
|
||||
id: u64,
|
||||
title: String,
|
||||
description: String,
|
||||
}
|
||||
|
||||
let path = dir.path().join("update-000.sst");
|
||||
let tokenizer_builder = DefaultBuilder;
|
||||
let mut builder = UpdateBuilder::new(path, schema.clone());
|
||||
let mut rng = XorShiftRng::seed_from_u64(42);
|
||||
|
||||
for i in 0..30_000 {
|
||||
let document = Document {
|
||||
id: i,
|
||||
title: random_sentences(rng.gen_range(1, 8), &mut rng),
|
||||
description: random_sentences(rng.gen_range(20, 200), &mut rng),
|
||||
};
|
||||
builder.update_document(&document, &tokenizer_builder)?;
|
||||
}
|
||||
|
||||
let update = builder.build()?;
|
||||
let view = database.ingest_update_file(update)?;
|
||||
|
||||
bench.iter(|| {
|
||||
for q in &["a", "b", "c", "d", "e"] {
|
||||
let documents = view.query_builder().unwrap().query(q, 0..20);
|
||||
test::black_box(|| documents);
|
||||
}
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -1,3 +1,5 @@
|
||||
#![cfg_attr(feature = "nightly", feature(test))]
|
||||
|
||||
pub mod automaton;
|
||||
pub mod database;
|
||||
pub mod data;
|
||||
|
@ -121,3 +121,42 @@ mod tests {
|
||||
assert_eq!(matches_proximity(matches), 3);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(feature = "nightly", test))]
|
||||
mod bench {
|
||||
extern crate test;
|
||||
|
||||
use super::*;
|
||||
use std::error::Error;
|
||||
use self::test::Bencher;
|
||||
|
||||
use rand_xorshift::XorShiftRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
|
||||
use crate::Attribute;
|
||||
|
||||
#[bench]
|
||||
fn evaluate_proximity(bench: &mut Bencher) -> Result<(), Box<Error>> {
|
||||
let number_matches = 30_000;
|
||||
let mut matches = Vec::with_capacity(number_matches);
|
||||
let mut rng = XorShiftRng::seed_from_u64(42);
|
||||
|
||||
for _ in 0..number_matches {
|
||||
let query_index = rng.gen_range(0, 4);
|
||||
|
||||
let attribute = rng.gen_range(0, 5);
|
||||
let word_index = rng.gen_range(0, 15);
|
||||
let attribute = Attribute::new_faillible(attribute, word_index);
|
||||
|
||||
let match_ = Match { query_index, attribute, ..Match::zero() };
|
||||
matches.push(match_);
|
||||
}
|
||||
|
||||
bench.iter(|| {
|
||||
let proximity = matches_proximity(&matches);
|
||||
test::black_box(move || proximity)
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user