diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index 26c1034eb..fdfc04af9 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -25,7 +25,7 @@ use milli::update::{ ClearDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting, }; use milli::{ - obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, Index, MatchingWords, + obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, Index, MatcherBuilder, SearchResult, SortError, }; use once_cell::sync::OnceCell; @@ -152,43 +152,25 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { Self { analyzer } } - fn highlight_value(&self, value: Value, matching_words: &MatchingWords) -> Value { + fn highlight_value(&self, value: Value, matcher_builder: &MatcherBuilder) -> Value { match value { Value::Null => Value::Null, Value::Bool(boolean) => Value::Bool(boolean), Value::Number(number) => Value::Number(number), Value::String(old_string) => { - let mut string = String::new(); let analyzed = self.analyzer.analyze(&old_string); - for (word, token) in analyzed.reconstruct() { - if token.is_word() { - match matching_words.matching_bytes(&token) { - Some(chars_to_highlight) => { - let mut chars = word.chars(); + let analyzed: Vec<_> = analyzed.tokens().collect(); + let mut matcher = matcher_builder.build(&analyzed[..], &old_string); - string.push_str(""); - // push the part to highlight - string.extend(chars.by_ref().take(chars_to_highlight)); - string.push_str(""); - // push the suffix after highlight - string.extend(chars); - } - // no highlight - None => string.push_str(word), - } - } else { - string.push_str(word); - } - } - Value::String(string) + Value::String(matcher.format(true, true).to_string()) } Value::Array(values) => Value::Array( - values.into_iter().map(|v| self.highlight_value(v, matching_words)).collect(), + values.into_iter().map(|v| self.highlight_value(v, matcher_builder)).collect(), ), Value::Object(object) => Value::Object( object .into_iter() - .map(|(k, v)| (k, self.highlight_value(v, matching_words))) + .map(|(k, v)| (k, self.highlight_value(v, matcher_builder))) .collect(), ), } @@ -197,14 +179,14 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { fn highlight_record( &self, object: &mut Map, - matching_words: &MatchingWords, + matcher_builder: &MatcherBuilder, attributes_to_highlight: &HashSet, ) { // TODO do we need to create a string for element that are not and needs to be highlight? for (key, value) in object.iter_mut() { if attributes_to_highlight.contains(key) { let old_value = mem::take(value); - *value = self.highlight_value(old_value, matching_words); + *value = self.highlight_value(old_value, matcher_builder); } } } @@ -819,12 +801,15 @@ async fn main() -> anyhow::Result<()> { let stop_words = fst::Set::default(); let highlighter = Highlighter::new(&stop_words); + let mut matcher_builder = MatcherBuilder::from_matching_words(matching_words); + matcher_builder.highlight_prefix("".to_string()); + matcher_builder.highlight_suffix("".to_string()); for (_id, obkv) in index.documents(&rtxn, documents_ids).unwrap() { let mut object = obkv_to_json(&displayed_fields, &fields_ids_map, obkv).unwrap(); if !disable_highlighting { highlighter.highlight_record( &mut object, - &matching_words, + &matcher_builder, &attributes_to_highlight, ); } diff --git a/milli/src/lib.rs b/milli/src/lib.rs index ba2bd9b0f..9a9ec428c 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -36,7 +36,9 @@ pub use self::heed_codec::{ RoaringBitmapLenCodec, StrBEU32Codec, StrStrU8Codec, }; pub use self::index::Index; -pub use self::search::{FacetDistribution, Filter, MatchingWords, Search, SearchResult}; +pub use self::search::{ + FacetDistribution, Filter, MatcherBuilder, MatchingWords, Search, SearchResult, +}; pub type Result = std::result::Result; diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs index 2169c54ab..aeaa8196e 100644 --- a/milli/src/search/matches/mod.rs +++ b/milli/src/search/matches/mod.rs @@ -34,6 +34,16 @@ impl MatcherBuilder { } } + pub fn from_matching_words(matching_words: MatchingWords) -> Self { + Self { + matching_words, + crop_size: DEFAULT_CROP_SIZE, + crop_marker: None, + highlight_prefix: None, + highlight_suffix: None, + } + } + pub fn crop_size(&mut self, word_count: usize) -> &Self { self.crop_size = word_count; self