From 7aabe42ae045ed1d3af8742b514933ac9ab1f90d Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 2 Jun 2022 17:59:04 +0200 Subject: [PATCH 1/4] Refactor matching words --- milli/src/search/matches/mod.rs | 178 +++++++++++++++++--------------- 1 file changed, 95 insertions(+), 83 deletions(-) diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs index 85e77e15b..1a6d8958a 100644 --- a/milli/src/search/matches/mod.rs +++ b/milli/src/search/matches/mod.rs @@ -1,6 +1,6 @@ use std::borrow::Cow; -use charabia::{SeparatorKind, Token}; +use charabia::{SeparatorKind, Token, Tokenizer}; use matching_words::{MatchType, PartialMatch, PrimitiveWordId}; pub use matching_words::{MatchingWord, MatchingWords}; use serde::Serialize; @@ -11,16 +11,23 @@ const DEFAULT_CROP_MARKER: &'static str = "…"; const DEFAULT_HIGHLIGHT_PREFIX: &'static str = ""; const DEFAULT_HIGHLIGHT_SUFFIX: &'static str = ""; -pub struct MatcherBuilder { +pub struct MatcherBuilder<'a, A> { matching_words: MatchingWords, + tokenizer: Tokenizer<'a, A>, crop_marker: Option, highlight_prefix: Option, highlight_suffix: Option, } -impl MatcherBuilder { - pub fn from_matching_words(matching_words: MatchingWords) -> Self { - Self { matching_words, crop_marker: None, highlight_prefix: None, highlight_suffix: None } +impl<'a, A> MatcherBuilder<'a, A> { + pub fn new(matching_words: MatchingWords, tokenizer: Tokenizer<'a, A>) -> Self { + Self { + matching_words, + tokenizer, + crop_marker: None, + highlight_prefix: None, + highlight_suffix: None, + } } pub fn crop_marker(&mut self, marker: String) -> &Self { @@ -38,7 +45,7 @@ impl MatcherBuilder { self } - pub fn build<'t, 'm>(&'m self, tokens: &'t [Token], text: &'t str) -> Matcher<'t, 'm> { + pub fn build<'t, 'm>(&'m self, text: &'t str) -> Matcher<'t, 'm, A> { let crop_marker = match &self.crop_marker { Some(marker) => marker.as_str(), None => &DEFAULT_CROP_MARKER, @@ -54,8 +61,8 @@ impl MatcherBuilder { }; Matcher { text, - tokens, matching_words: &self.matching_words, + tokenizer: &self.tokenizer, crop_marker, highlight_prefix, highlight_suffix, @@ -93,17 +100,17 @@ pub struct MatchBounds { pub length: usize, } -pub struct Matcher<'t, 'm> { +pub struct Matcher<'t, 'm, A> { text: &'t str, - tokens: &'t [Token<'t>], matching_words: &'m MatchingWords, + tokenizer: &'m Tokenizer<'m, A>, crop_marker: &'m str, highlight_prefix: &'m str, highlight_suffix: &'m str, - matches: Option>, + matches: Option<(Vec>, Vec)>, } -impl<'t> Matcher<'t, '_> { +impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> { /// Iterates over tokens and save any of them that matches the query. fn compute_matches(&mut self) -> &mut Self { fn compute_partial_match<'a>( @@ -159,10 +166,10 @@ impl<'t> Matcher<'t, '_> { false } + let tokens: Vec<_> = self.tokenizer.tokenize(self.text).collect(); let mut matches = Vec::new(); - let mut words_positions = self - .tokens + let mut words_positions = tokens .iter() .scan((0, 0), |(token_position, word_position), token| { let current_token_position = *token_position; @@ -210,7 +217,7 @@ impl<'t> Matcher<'t, '_> { } } - self.matches = Some(matches); + self.matches = Some((tokens, matches)); self } @@ -218,10 +225,10 @@ impl<'t> Matcher<'t, '_> { pub fn matches(&mut self) -> Vec { match &self.matches { None => self.compute_matches().matches(), - Some(matches) => matches + Some((tokens, matches)) => matches .iter() .map(|m| MatchBounds { - start: self.tokens[m.token_position].byte_start, + start: tokens[m.token_position].byte_start, length: m.match_len, }) .collect(), @@ -229,7 +236,7 @@ impl<'t> Matcher<'t, '_> { } /// Returns the bounds in byte index of the crop window. - fn crop_bounds(&self, matches: &[Match], crop_size: usize) -> (usize, usize) { + fn crop_bounds(&self, tokens: &[Token], matches: &[Match], crop_size: usize) -> (usize, usize) { // if there is no match, we start from the beginning of the string by default. let first_match_word_position = matches.first().map(|m| m.word_position).unwrap_or(0); let first_match_token_position = matches.first().map(|m| m.token_position).unwrap_or(0); @@ -239,8 +246,8 @@ impl<'t> Matcher<'t, '_> { // matches needs to be counted in the crop len. let mut remaining_words = crop_size + first_match_word_position - last_match_word_position; - let mut before_tokens = self.tokens[..first_match_token_position].iter().rev().peekable(); - let mut after_tokens = self.tokens[last_match_token_position..].iter().peekable(); + let mut before_tokens = tokens[..first_match_token_position].iter().rev().peekable(); + let mut after_tokens = tokens[last_match_token_position..].iter().peekable(); while remaining_words > 0 { let before_token = before_tokens.peek().map(|t| t.separator_kind()); @@ -396,7 +403,7 @@ impl<'t> Matcher<'t, '_> { Cow::Borrowed(self.text) } else { match &self.matches { - Some(matches) => { + Some((tokens, matches)) => { let matches = match format_options.crop { Some(crop_size) if crop_size > 0 => { self.find_best_match_interval(matches, crop_size) @@ -405,7 +412,9 @@ impl<'t> Matcher<'t, '_> { }; let (byte_start, byte_end) = match format_options.crop { - Some(crop_size) if crop_size > 0 => self.crop_bounds(matches, crop_size), + Some(crop_size) if crop_size > 0 => { + self.crop_bounds(tokens, matches, crop_size) + } _ => (0, self.text.len()), }; @@ -420,7 +429,6 @@ impl<'t> Matcher<'t, '_> { if format_options.highlight { // insert highlight markers around matches. - let tokens = self.tokens; for m in matches { let token = &tokens[m.token_position]; @@ -470,7 +478,7 @@ impl<'t> Matcher<'t, '_> { #[cfg(test)] mod tests { - use charabia::Tokenize; + use charabia::TokenizerBuilder; use super::*; use crate::search::matches::matching_words::MatchingWord; @@ -485,6 +493,12 @@ mod tests { MatchingWords::new(matching_words) } + impl MatcherBuilder<'_, Vec> { + pub fn from_matching_words(matching_words: MatchingWords) -> Self { + Self::new(matching_words, TokenizerBuilder::default().build()) + } + } + #[test] fn format_identity() { let matching_words = matching_words(); @@ -495,22 +509,22 @@ mod tests { // Text without any match. let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // no crop and no highlight should return complete text. assert_eq!(&matcher.format(format_options), &text); // Text containing all matches. let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // no crop and no highlight should return complete text. assert_eq!(&matcher.format(format_options), &text); // Text containing some matches. let text = "Natalie risk her future to build a world with the boy she loves."; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // no crop and no highlight should return complete text. assert_eq!(&matcher.format(format_options), &text); } @@ -525,34 +539,34 @@ mod tests { // empty text. let text = ""; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); assert_eq!(&matcher.format(format_options), ""); // text containing only separators. let text = ":-)"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); assert_eq!(&matcher.format(format_options), ":-)"); // Text without any match. let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // no crop should return complete text, because there is no matches. assert_eq!(&matcher.format(format_options), &text); // Text containing all matches. let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. assert_eq!(&matcher.format(format_options), "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."); // Text containing some matches. let text = "Natalie risk her future to build a world with the boy she loves."; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. assert_eq!( &matcher.format(format_options), @@ -575,22 +589,22 @@ mod tests { // Text containing prefix match. let text = "Ŵôřlḑôle"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. assert_eq!(&matcher.format(format_options), "Ŵôřlḑôle"); // Text containing unicode match. let text = "Ŵôřlḑ"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. assert_eq!(&matcher.format(format_options), "Ŵôřlḑ"); // Text containing unicode match. let text = "Westfália"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. assert_eq!(&matcher.format(format_options), "Westfália"); } @@ -605,20 +619,20 @@ mod tests { // empty text. let text = ""; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); assert_eq!(&matcher.format(format_options), ""); // text containing only separators. let text = ":-)"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); assert_eq!(&matcher.format(format_options), ":-)"); // Text without any match. let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // no highlight should return 10 first words with a marker at the end. assert_eq!( &matcher.format(format_options), @@ -627,8 +641,8 @@ mod tests { // Text without any match starting by a separator. let text = "(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // no highlight should return 10 first words with a marker at the end. assert_eq!( &matcher.format(format_options), @@ -637,8 +651,8 @@ mod tests { // Test phrase propagation let text = "Natalie risk her future. Split The World is a book written by Emily Henry. I never read it."; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // should crop the phrase instead of croping around the match. assert_eq!( &matcher.format(format_options), @@ -647,8 +661,8 @@ mod tests { // Text containing some matches. let text = "Natalie risk her future to build a world with the boy she loves."; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // no highlight should return 10 last words with a marker at the start. assert_eq!( &matcher.format(format_options), @@ -657,8 +671,8 @@ mod tests { // Text containing all matches. let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // no highlight should return 10 last words with a marker at the start. assert_eq!( &matcher.format(format_options), @@ -667,8 +681,8 @@ mod tests { // Text containing a match unordered and a match ordered. let text = "The world split void void void void void void void void void split the world void void"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // crop should return 10 last words with a marker at the start. assert_eq!( &matcher.format(format_options), @@ -677,8 +691,8 @@ mod tests { // Text containing matches with diferent density. let text = "split void the void void world void void void void void void void void void void split the world void void"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // crop should return 10 last words with a marker at the start. assert_eq!( &matcher.format(format_options), @@ -687,8 +701,8 @@ mod tests { // Text containing matches with same word. let text = "split split split split split split void void void void void void void void void void split the world void void"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // crop should return 10 last words with a marker at the start. assert_eq!( &matcher.format(format_options), @@ -706,20 +720,20 @@ mod tests { // empty text. let text = ""; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); assert_eq!(&matcher.format(format_options), ""); // text containing only separators. let text = ":-)"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); assert_eq!(&matcher.format(format_options), ":-)"); // Text without any match. let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // both should return 10 first words with a marker at the end. assert_eq!( &matcher.format(format_options), @@ -728,8 +742,8 @@ mod tests { // Text containing some matches. let text = "Natalie risk her future to build a world with the boy she loves."; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // both should return 10 last words with a marker at the start and highlighted matches. assert_eq!( &matcher.format(format_options), @@ -738,15 +752,15 @@ mod tests { // Text containing all matches. let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // both should return 10 last words with a marker at the start and highlighted matches. assert_eq!(&matcher.format(format_options), "…she loves. Emily Henry: The Love That Split The World."); // Text containing a match unordered and a match ordered. let text = "The world split void void void void void void void void void split the world void void"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + + let mut matcher = builder.build(text); // crop should return 10 last words with a marker at the start. assert_eq!( &matcher.format(format_options), @@ -762,26 +776,25 @@ mod tests { let builder = MatcherBuilder::from_matching_words(matching_words); let text = "void void split the world void void."; - let tokens: Vec<_> = text.tokenize().collect(); // set a smaller crop size let format_options = FormatOptions { highlight: false, crop: Some(2) }; - let mut matcher = builder.build(&tokens[..], text); + let mut matcher = builder.build(text); // because crop size < query size, partially format matches. assert_eq!(&matcher.format(format_options), "…split the…"); // set a smaller crop size let format_options = FormatOptions { highlight: false, crop: Some(1) }; - let mut matcher = builder.build(&tokens[..], text); + let mut matcher = builder.build(text); // because crop size < query size, partially format matches. assert_eq!(&matcher.format(format_options), "…split…"); // set crop size to 0 let format_options = FormatOptions { highlight: false, crop: Some(0) }; - let mut matcher = builder.build(&tokens[..], text); + let mut matcher = builder.build(text); // because crop size is 0, crop is ignored. assert_eq!(&matcher.format(format_options), "void void split the world void void."); } @@ -817,9 +830,8 @@ mod tests { let format_options = FormatOptions { highlight: true, crop: None }; let text = "the do or die can't be he do and or isn't he"; - let tokens: Vec<_> = text.tokenize().collect(); - let mut matcher = builder.build(&tokens[..], text); + let mut matcher = builder.build(text); assert_eq!( &matcher.format(format_options), "_the_ _do_ _or_ die can't be he _do_ and or isn'_t_ _he_", From 727d663f28680c2c8c6c2f868bf9dac87ca6de8e Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 2 Jun 2022 18:07:10 +0200 Subject: [PATCH 2/4] Update benchmarks --- benchmarks/benches/formatting.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/benches/formatting.rs b/benchmarks/benches/formatting.rs index 25e88ffeb..f0ef8ea15 100644 --- a/benchmarks/benches/formatting.rs +++ b/benchmarks/benches/formatting.rs @@ -1,5 +1,5 @@ use criterion::{criterion_group, criterion_main}; -use milli::tokenizer::Tokenize; +use milli::tokenizer::TokenizerBuilder; use milli::{FormatOptions, MatcherBuilder, MatchingWord, MatchingWords}; #[cfg(target_os = "linux")] @@ -9,7 +9,7 @@ static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; struct Conf<'a> { name: &'a str, text: &'a str, - matching_words: MatcherBuilder, + matching_words: MatcherBuilder<'a, Vec>, } fn bench_formatting(c: &mut criterion::Criterion) { @@ -18,7 +18,7 @@ fn bench_formatting(c: &mut criterion::Criterion) { Conf { name: "'the door d'", text: r#"He used to do the door sounds in "Star Trek" with his mouth, phssst, phssst. The MD-11 passenger and cargo doors also tend to behave like electromagnetic apertures, because the doors do not have continuous electrical contact with the door frames around the door perimeter. But Theodor said that the doors don't work."#, - matching_words: MatcherBuilder::from_matching_words(MatchingWords::new(vec![ + matching_words: MatcherBuilder::new(MatchingWords::new(vec![ (vec![MatchingWord::new("t".to_string(), 0, false), MatchingWord::new("he".to_string(), 0, false)], vec![0]), (vec![MatchingWord::new("the".to_string(), 0, false)], vec![0]), (vec![MatchingWord::new("door".to_string(), 1, false)], vec![1]), @@ -27,7 +27,8 @@ fn bench_formatting(c: &mut criterion::Criterion) { (vec![MatchingWord::new("d".to_string(), 0, true)], vec![2]), (vec![MatchingWord::new("thedoord".to_string(), 1, true)], vec![0, 1, 2]), (vec![MatchingWord::new("doord".to_string(), 1, true)], vec![1, 2]), - ])), + ] + ), TokenizerBuilder::default().build()), }, ]; @@ -52,8 +53,7 @@ fn bench_formatting(c: &mut criterion::Criterion) { for conf in confs { group.bench_function(conf.name, |b| { b.iter(|| { - let tokens: Vec<_> = conf.text.tokenize().collect(); - let mut matcher = conf.matching_words.build(&tokens[..], conf.text); + let mut matcher = conf.matching_words.build(conf.text); matcher.format(option.clone()); }) }); From a5c790bf4b54eeb763d9742fc1c6bc373370fb4a Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 2 Jun 2022 18:15:36 +0200 Subject: [PATCH 3/4] Update http-ui --- http-ui/src/main.rs | 43 +++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index 57a78b41e..ce4fa7ba5 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -19,7 +19,7 @@ use flate2::read::GzDecoder; use futures::{stream, FutureExt, StreamExt}; use heed::EnvOpenOptions; use milli::documents::DocumentBatchReader; -use milli::tokenizer::{Tokenizer, TokenizerBuilder}; +use milli::tokenizer::TokenizerBuilder; use milli::update::UpdateIndexingStep::*; use milli::update::{ ClearDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting, @@ -140,38 +140,31 @@ pub struct IndexerOpt { } struct Highlighter<'s, A> { - tokenizer: Tokenizer<'s, A>, + matcher_builder: MatcherBuilder<'s, A>, } impl<'s, A: AsRef<[u8]>> Highlighter<'s, A> { - fn new(stop_words: &'s fst::Set) -> Self { - let mut builder = TokenizerBuilder::new(); - builder.stop_words(stop_words); - - Self { tokenizer: builder.build() } + fn new(matcher_builder: MatcherBuilder<'s, A>) -> Self { + Self { matcher_builder } } - fn highlight_value(&self, value: Value, matcher_builder: &MatcherBuilder) -> Value { + fn highlight_value(&self, value: Value) -> Value { match value { Value::Null => Value::Null, Value::Bool(boolean) => Value::Bool(boolean), Value::Number(number) => Value::Number(number), Value::String(old_string) => { - let tokens: Vec<_> = self.tokenizer.tokenize(&old_string).collect(); - let mut matcher = matcher_builder.build(&tokens[..], &old_string); + let mut matcher = self.matcher_builder.build(&old_string); let format_options = FormatOptions { highlight: true, crop: Some(10) }; Value::String(matcher.format(format_options).to_string()) } - Value::Array(values) => Value::Array( - values.into_iter().map(|v| self.highlight_value(v, matcher_builder)).collect(), - ), + Value::Array(values) => { + Value::Array(values.into_iter().map(|v| self.highlight_value(v)).collect()) + } Value::Object(object) => Value::Object( - object - .into_iter() - .map(|(k, v)| (k, self.highlight_value(v, matcher_builder))) - .collect(), + object.into_iter().map(|(k, v)| (k, self.highlight_value(v))).collect(), ), } } @@ -179,14 +172,13 @@ impl<'s, A: AsRef<[u8]>> Highlighter<'s, A> { fn highlight_record( &self, object: &mut Map, - matcher_builder: &MatcherBuilder, attributes_to_highlight: &HashSet, ) { // TODO do we need to create a string for element that are not and needs to be highlight? for (key, value) in object.iter_mut() { if attributes_to_highlight.contains(key) { let old_value = mem::take(value); - *value = self.highlight_value(old_value, matcher_builder); + *value = self.highlight_value(old_value); } } } @@ -798,20 +790,15 @@ async fn main() -> anyhow::Result<()> { None => fields_ids_map.iter().map(|(_, name)| name).map(String::from).collect(), }; - let stop_words = fst::Set::default(); - let highlighter = Highlighter::new(&stop_words); - - let mut matcher_builder = MatcherBuilder::from_matching_words(matching_words); + let mut matcher_builder = + MatcherBuilder::new(matching_words, TokenizerBuilder::default().build()); matcher_builder.highlight_prefix("".to_string()); matcher_builder.highlight_suffix("".to_string()); + let highlighter = Highlighter::new(matcher_builder); for (_id, obkv) in index.documents(&rtxn, documents_ids).unwrap() { let mut object = obkv_to_json(&displayed_fields, &fields_ids_map, obkv).unwrap(); if !disable_highlighting { - highlighter.highlight_record( - &mut object, - &matcher_builder, - &attributes_to_highlight, - ); + highlighter.highlight_record(&mut object, &attributes_to_highlight); } documents.push(object); From d212dc6b8b797a85a045ead07717a57648cc79f0 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 2 Jun 2022 18:22:56 +0200 Subject: [PATCH 4/4] Remove useless newline --- milli/src/search/matches/mod.rs | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs index 1a6d8958a..ba2e8728e 100644 --- a/milli/src/search/matches/mod.rs +++ b/milli/src/search/matches/mod.rs @@ -509,21 +509,18 @@ mod tests { // Text without any match. let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; - let mut matcher = builder.build(text); // no crop and no highlight should return complete text. assert_eq!(&matcher.format(format_options), &text); // Text containing all matches. let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; - let mut matcher = builder.build(text); // no crop and no highlight should return complete text. assert_eq!(&matcher.format(format_options), &text); // Text containing some matches. let text = "Natalie risk her future to build a world with the boy she loves."; - let mut matcher = builder.build(text); // no crop and no highlight should return complete text. assert_eq!(&matcher.format(format_options), &text); @@ -539,33 +536,28 @@ mod tests { // empty text. let text = ""; - let mut matcher = builder.build(text); assert_eq!(&matcher.format(format_options), ""); // text containing only separators. let text = ":-)"; - let mut matcher = builder.build(text); assert_eq!(&matcher.format(format_options), ":-)"); // Text without any match. let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; - let mut matcher = builder.build(text); // no crop should return complete text, because there is no matches. assert_eq!(&matcher.format(format_options), &text); // Text containing all matches. let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; - let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. assert_eq!(&matcher.format(format_options), "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."); // Text containing some matches. let text = "Natalie risk her future to build a world with the boy she loves."; - let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. assert_eq!( @@ -589,21 +581,18 @@ mod tests { // Text containing prefix match. let text = "Ŵôřlḑôle"; - let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. assert_eq!(&matcher.format(format_options), "Ŵôřlḑôle"); // Text containing unicode match. let text = "Ŵôřlḑ"; - let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. assert_eq!(&matcher.format(format_options), "Ŵôřlḑ"); // Text containing unicode match. let text = "Westfália"; - let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. assert_eq!(&matcher.format(format_options), "Westfália"); @@ -619,19 +608,16 @@ mod tests { // empty text. let text = ""; - let mut matcher = builder.build(text); assert_eq!(&matcher.format(format_options), ""); // text containing only separators. let text = ":-)"; - let mut matcher = builder.build(text); assert_eq!(&matcher.format(format_options), ":-)"); // Text without any match. let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; - let mut matcher = builder.build(text); // no highlight should return 10 first words with a marker at the end. assert_eq!( @@ -641,7 +627,6 @@ mod tests { // Text without any match starting by a separator. let text = "(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)"; - let mut matcher = builder.build(text); // no highlight should return 10 first words with a marker at the end. assert_eq!( @@ -651,7 +636,6 @@ mod tests { // Test phrase propagation let text = "Natalie risk her future. Split The World is a book written by Emily Henry. I never read it."; - let mut matcher = builder.build(text); // should crop the phrase instead of croping around the match. assert_eq!( @@ -661,7 +645,6 @@ mod tests { // Text containing some matches. let text = "Natalie risk her future to build a world with the boy she loves."; - let mut matcher = builder.build(text); // no highlight should return 10 last words with a marker at the start. assert_eq!( @@ -671,7 +654,6 @@ mod tests { // Text containing all matches. let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; - let mut matcher = builder.build(text); // no highlight should return 10 last words with a marker at the start. assert_eq!( @@ -681,7 +663,6 @@ mod tests { // Text containing a match unordered and a match ordered. let text = "The world split void void void void void void void void void split the world void void"; - let mut matcher = builder.build(text); // crop should return 10 last words with a marker at the start. assert_eq!( @@ -691,7 +672,6 @@ mod tests { // Text containing matches with diferent density. let text = "split void the void void world void void void void void void void void void void split the world void void"; - let mut matcher = builder.build(text); // crop should return 10 last words with a marker at the start. assert_eq!( @@ -701,7 +681,6 @@ mod tests { // Text containing matches with same word. let text = "split split split split split split void void void void void void void void void void split the world void void"; - let mut matcher = builder.build(text); // crop should return 10 last words with a marker at the start. assert_eq!( @@ -720,19 +699,16 @@ mod tests { // empty text. let text = ""; - let mut matcher = builder.build(text); assert_eq!(&matcher.format(format_options), ""); // text containing only separators. let text = ":-)"; - let mut matcher = builder.build(text); assert_eq!(&matcher.format(format_options), ":-)"); // Text without any match. let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; - let mut matcher = builder.build(text); // both should return 10 first words with a marker at the end. assert_eq!( @@ -742,7 +718,6 @@ mod tests { // Text containing some matches. let text = "Natalie risk her future to build a world with the boy she loves."; - let mut matcher = builder.build(text); // both should return 10 last words with a marker at the start and highlighted matches. assert_eq!( @@ -752,14 +727,12 @@ mod tests { // Text containing all matches. let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; - let mut matcher = builder.build(text); // both should return 10 last words with a marker at the start and highlighted matches. assert_eq!(&matcher.format(format_options), "…she loves. Emily Henry: The Love That Split The World."); // Text containing a match unordered and a match ordered. let text = "The world split void void void void void void void void void split the world void void"; - let mut matcher = builder.build(text); // crop should return 10 last words with a marker at the start. assert_eq!( @@ -779,21 +752,18 @@ mod tests { // set a smaller crop size let format_options = FormatOptions { highlight: false, crop: Some(2) }; - let mut matcher = builder.build(text); // because crop size < query size, partially format matches. assert_eq!(&matcher.format(format_options), "…split the…"); // set a smaller crop size let format_options = FormatOptions { highlight: false, crop: Some(1) }; - let mut matcher = builder.build(text); // because crop size < query size, partially format matches. assert_eq!(&matcher.format(format_options), "…split…"); // set crop size to 0 let format_options = FormatOptions { highlight: false, crop: Some(0) }; - let mut matcher = builder.build(text); // because crop size is 0, crop is ignored. assert_eq!(&matcher.format(format_options), "void void split the world void void."); @@ -830,7 +800,6 @@ mod tests { let format_options = FormatOptions { highlight: true, crop: None }; let text = "the do or die can't be he do and or isn't he"; - let mut matcher = builder.build(text); assert_eq!( &matcher.format(format_options),