From 5809d3ae0d3c7b86d4c65ccabd689038cc3b0bc7 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 12 Apr 2022 16:31:58 +0200 Subject: [PATCH] Add first benchmarks on formatting --- benchmarks/Cargo.toml | 4 ++ benchmarks/benches/formatting.rs | 68 ++++++++++++++++++++++++++++++++ milli/src/lib.rs | 4 +- milli/src/search/matches/mod.rs | 6 +-- milli/src/search/mod.rs | 4 +- 5 files changed, 79 insertions(+), 7 deletions(-) create mode 100644 benchmarks/benches/formatting.rs diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 0cac5e017..0dbbd6d6f 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -39,3 +39,7 @@ harness = false [[bench]] name = "indexing" harness = false + +[[bench]] +name = "formatting" +harness = false diff --git a/benchmarks/benches/formatting.rs b/benchmarks/benches/formatting.rs new file mode 100644 index 000000000..5045df268 --- /dev/null +++ b/benchmarks/benches/formatting.rs @@ -0,0 +1,68 @@ +use criterion::{criterion_group, criterion_main}; +use milli::tokenizer::{Analyzer, AnalyzerConfig}; +use milli::{FormatOptions, MatcherBuilder, MatchingWord, MatchingWords}; + +#[cfg(target_os = "linux")] +#[global_allocator] +static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; + +struct Conf<'a> { + name: &'a str, + text: &'a str, + matching_words: MatcherBuilder, +} + +fn bench_formatting(c: &mut criterion::Criterion) { + #[rustfmt::skip] + let confs = &[ + Conf { + name: "'the door d'", + text: r#"He used to do the door sounds in "Star Trek" with his mouth, phssst, phssst. The MD-11 passenger and cargo doors also tend to behave like electromagnetic apertures, because the doors do not have continuous electrical contact with the door frames around the door perimeter. But Theodor said that the doors don't work."#, + matching_words: MatcherBuilder::from_matching_words(MatchingWords::new(vec![ + (vec![MatchingWord::new("t".to_string(), 0, false), MatchingWord::new("he".to_string(), 0, false)], vec![0]), + (vec![MatchingWord::new("the".to_string(), 0, false)], vec![0]), + (vec![MatchingWord::new("door".to_string(), 1, false)], vec![1]), + (vec![MatchingWord::new("do".to_string(), 0, false), MatchingWord::new("or".to_string(), 0, false)], vec![0]), + (vec![MatchingWord::new("thedoor".to_string(), 1, false)], vec![0, 1]), + (vec![MatchingWord::new("d".to_string(), 0, true)], vec![2]), + (vec![MatchingWord::new("thedoord".to_string(), 1, true)], vec![0, 1, 2]), + (vec![MatchingWord::new("doord".to_string(), 1, true)], vec![1, 2]), + ])), + }, + ]; + + let format_options = &[ + FormatOptions { highlight: false, crop: None }, + FormatOptions { highlight: true, crop: None }, + FormatOptions { highlight: false, crop: Some(10) }, + FormatOptions { highlight: true, crop: Some(10) }, + FormatOptions { highlight: false, crop: Some(20) }, + FormatOptions { highlight: true, crop: Some(20) }, + ]; + + for option in format_options { + let highlight = if option.highlight { "highlight" } else { "no-highlight" }; + + let name = match option.crop { + Some(size) => format!("{}-crop({})", highlight, size), + None => format!("{}-no-crop", highlight), + }; + + let mut group = c.benchmark_group(&name); + for conf in confs { + group.bench_function(conf.name, |b| { + b.iter(|| { + let analyzer = Analyzer::new(AnalyzerConfig::>::default()); + let analyzed = analyzer.analyze(&conf.text); + let tokens: Vec<_> = analyzed.tokens().collect(); + let mut matcher = conf.matching_words.build(&tokens[..], conf.text); + matcher.format(option.clone()); + }) + }); + } + group.finish(); + } +} + +criterion_group!(benches, bench_formatting); +criterion_main!(benches); diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 793079563..6f5d4abe8 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -37,8 +37,8 @@ pub use self::heed_codec::{ }; pub use self::index::Index; pub use self::search::{ - FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search, - SearchResult, + FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWord, + MatchingWords, Search, SearchResult, }; pub type Result = std::result::Result; diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs index 65ff0a255..ad4f6cd69 100644 --- a/milli/src/search/matches/mod.rs +++ b/milli/src/search/matches/mod.rs @@ -1,11 +1,9 @@ use std::borrow::Cow; -pub use matching_words::MatchingWords; -use matching_words::{MatchType, PrimitiveWordId}; +use matching_words::{MatchType, PartialMatch, PrimitiveWordId}; +pub use matching_words::{MatchingWord, MatchingWords}; use meilisearch_tokenizer::token::{SeparatorKind, Token}; -use crate::search::matches::matching_words::PartialMatch; - pub mod matching_words; const DEFAULT_CROP_MARKER: &'static str = "…"; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index a9712d261..979b2fd7a 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -17,7 +17,9 @@ use roaring::bitmap::RoaringBitmap; pub use self::facet::{FacetDistribution, FacetNumberIter, Filter}; use self::fst_utils::{Complement, Intersection, StartsWith, Union}; -pub use self::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords}; +pub use self::matches::{ + FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords, +}; use self::query_tree::QueryTreeBuilder; use crate::error::UserError; use crate::search::criteria::r#final::{Final, FinalResult};