diff --git a/Cargo.lock b/Cargo.lock index 91f11d5ae..aa7624bfe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1141,6 +1141,7 @@ dependencies = [ "serde_qs", "sha2", "siphasher", + "slice-group-by", "structopt", "sysinfo", "tempdir", diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 2b731f5e6..265265d57 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -42,6 +42,7 @@ tide = "0.6.0" ureq = { version = "0.12.0", features = ["tls"], default-features = false } walkdir = "2.3.1" whoami = "0.8.1" +slice-group-by = "0.2.6" [dev-dependencies] http-service = "0.4.0" diff --git a/meilisearch-http/src/helpers/meilisearch.rs b/meilisearch-http/src/helpers/meilisearch.rs index 9dba1696d..a925562be 100644 --- a/meilisearch-http/src/helpers/meilisearch.rs +++ b/meilisearch-http/src/helpers/meilisearch.rs @@ -17,6 +17,7 @@ use meilisearch_tokenizer::is_cjk; use serde::{Deserialize, Serialize}; use serde_json::Value; use siphasher::sip::SipHasher; +use slice_group_by::GroupBy; #[derive(Debug)] pub enum Error { @@ -526,7 +527,12 @@ fn calculate_highlights( let value: Vec<_> = value.chars().collect(); let mut highlighted_value = String::new(); let mut index = 0; - for m in matches { + + let longest_matches = matches + .linear_group_by_key(|m| m.start) + .map(|group| group.last().unwrap()); + + for m in longest_matches { if m.start >= index { let before = value.get(index..m.start); let highlighted = value.get(m.start..(m.start + m.length)); @@ -587,6 +593,35 @@ mod tests { } + #[test] + fn calculate_matches() { + let mut matches = Vec::new(); + matches.push(Highlight { attribute: 0, char_index: 0, char_length: 3}); + matches.push(Highlight { attribute: 0, char_index: 0, char_length: 2}); + + let mut attributes_to_retrieve: HashSet = HashSet::new(); + attributes_to_retrieve.insert("title".to_string()); + + let schema = Schema::with_primary_key("title"); + + let matches_result = super::calculate_matches(matches, Some(attributes_to_retrieve), &schema); + + let mut matches_result_expected: HashMap> = HashMap::new(); + + let mut positions = Vec::new(); + positions.push(MatchPosition { + start: 0, + length: 2, + }); + positions.push(MatchPosition { + start: 0, + length: 3, + }); + matches_result_expected.insert("title".to_string(), positions); + + assert_eq!(matches_result, matches_result_expected); + } + #[test] fn calculate_highlights() { let data = r#"{ @@ -625,4 +660,38 @@ mod tests { assert_eq!(result, result_expected); } + + #[test] + fn highlight_longest_match() { + let data = r#"{ + "title": "Ice" + }"#; + + let document: IndexMap = serde_json::from_str(data).unwrap(); + let mut attributes_to_highlight = HashSet::new(); + attributes_to_highlight.insert("title".to_string()); + + let mut matches = HashMap::new(); + + let mut m = Vec::new(); + m.push(MatchPosition { + start: 0, + length: 2, + }); + m.push(MatchPosition { + start: 0, + length: 3, + }); + matches.insert("title".to_string(), m); + + let result = super::calculate_highlights(&document, &matches, &attributes_to_highlight); + + let mut result_expected = IndexMap::new(); + result_expected.insert( + "title".to_string(), + Value::String("Ice".to_string()), + ); + + assert_eq!(result, result_expected); + } }