From 79046378937dbfb59d1c5280e6b8077aa0fe98ff Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Wed, 5 May 2021 17:31:40 +0200 Subject: [PATCH 01/34] crop skeleton --- meilisearch-http/src/index/search.rs | 62 ++++++++++++++++------------ 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 50d163898..21a587a37 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -232,7 +232,8 @@ fn compute_formatted>( highlighter: &Highlighter, matching_words: &impl Matcher, all_formatted: &[FieldId], - to_highlight_ids: &HashSet, + to_highlight_fields: &HashSet, + to_crop_fields: &HashSet, ) -> anyhow::Result { let mut document = Document::new(); @@ -240,8 +241,8 @@ fn compute_formatted>( if let Some(value) = obkv.get(*field) { let mut value: Value = serde_json::from_slice(value)?; - if to_highlight_ids.contains(field) { - value = highlighter.highlight_value(value, matching_words); + if to_highlight_fields.contains(field) { + value = highlighter.format_value(value, matching_words, to_highlight_fields.contains(field)); } // This unwrap must be safe since we got the ids from the fields_ids_map just @@ -291,46 +292,55 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { Self { analyzer, marks } } - fn highlight_value(&self, value: Value, words_to_highlight: &impl Matcher) -> Value { + fn format_value( + &self, + value: Value, + matcher: &impl Matcher, + need_to_crop: Option, + need_to_highlight: bool, + ) -> Value { match value { - Value::Null => Value::Null, - Value::Bool(boolean) => Value::Bool(boolean), - Value::Number(number) => Value::Number(number), Value::String(old_string) => { - let mut string = String::new(); - let analyzed = self.analyzer.analyze(&old_string); - for (word, token) in analyzed.reconstruct() { - if token.is_word() { - let to_highlight = words_to_highlight.matches(token.text()); - if to_highlight { - string.push_str(&self.marks.0) - } - string.push_str(word); - if to_highlight { - string.push_str(&self.marks.1) - } - } else { - string.push_str(word); - } - } - Value::String(string) + let value = self.format_string(old_string, need_to_crop, need_to_highlight); + Value::String(value) } Value::Array(values) => Value::Array( values .into_iter() - .map(|v| self.highlight_value(v, words_to_highlight)) + .map(|v| self.format_value(v, matcher, None, need_to_highlight)) .collect(), ), Value::Object(object) => Value::Object( object .into_iter() - .map(|(k, v)| (k, self.highlight_value(v, words_to_highlight))) + .map(|(k, v)| (k, self.format_value(value, matcher, None, need_to_highlight))) .collect(), ), + value => value, } } + fn format_string(&self, s: String, need_to_crop: Option, need_to_highlight: bool) -> String { + let word_iter: Box> = if let Some(_crop_len) = need_to_crop { + // cropping iterator + todo!() + } else { + // normal Iterator + todo!() + }; + + word_iter.map(|(word, is_match)| { + if need_to_highlight && is_match { + // highlight word + todo!() + } else { + word + } + }) + .collect::() + } } + fn parse_facets( facets: &Value, index: &Index, From a03d9d496e2cd85c22e74811b4103053e850cb65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Thu, 6 May 2021 16:32:11 +0200 Subject: [PATCH 02/34] Fix compilation errors --- meilisearch-http/src/index/search.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 21a587a37..262535888 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -168,6 +168,7 @@ impl Index { &matching_words, all_formatted.as_ref().as_slice(), &to_highlight_ids, + &to_crop_ids, )?; let hit = SearchHit { document, @@ -241,8 +242,14 @@ fn compute_formatted>( if let Some(value) = obkv.get(*field) { let mut value: Value = serde_json::from_slice(value)?; + let need_to_crop = if to_crop_fields.contains(field) { + Some(200) // TO CHANGE + } else { + None + }; + if to_highlight_fields.contains(field) { - value = highlighter.format_value(value, matching_words, to_highlight_fields.contains(field)); + value = highlighter.format_value(value, matching_words, need_to_crop, to_highlight_fields.contains(field)); } // This unwrap must be safe since we got the ids from the fields_ids_map just @@ -313,7 +320,7 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { Value::Object(object) => Value::Object( object .into_iter() - .map(|(k, v)| (k, self.format_value(value, matcher, None, need_to_highlight))) + .map(|(k, v)| (k, self.format_value(v, matcher, None, need_to_highlight))) .collect(), ), value => value, From 60f6d1c373c1c4319bc982b3061f98f6cea67097 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Thu, 6 May 2021 18:31:41 +0200 Subject: [PATCH 03/34] First version of highlight after refacto --- meilisearch-http/src/index/search.rs | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 262535888..f9b1508f9 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -308,7 +308,7 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { ) -> Value { match value { Value::String(old_string) => { - let value = self.format_string(old_string, need_to_crop, need_to_highlight); + let value = self.format_string(old_string, matcher, need_to_crop, need_to_highlight); Value::String(value) } Value::Array(values) => Value::Array( @@ -326,19 +326,28 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { value => value, } } - fn format_string(&self, s: String, need_to_crop: Option, need_to_highlight: bool) -> String { + fn format_string(&self, s: String, matcher: &impl Matcher, need_to_crop: Option, need_to_highlight: bool) -> String { + let analyzed = self.analyzer.analyze(&s); let word_iter: Box> = if let Some(_crop_len) = need_to_crop { // cropping iterator todo!() } else { - // normal Iterator - todo!() + Box::new(analyzed.reconstruct().map(|(word, token)| { + if token.is_word() && matcher.matches(token.text()){ + (word.to_string(), true) + } else { + (word.to_string(), false) + } + })) }; word_iter.map(|(word, is_match)| { if need_to_highlight && is_match { - // highlight word - todo!() + let mut new_word = String::new(); + new_word.push_str(&self.marks.0); + new_word.push_str(&word); + new_word.push_str(&self.marks.1); + new_word } else { word } From 93002e734cebabfb63c9f66b16e4dac1a3ea6a3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Thu, 6 May 2021 18:41:04 +0200 Subject: [PATCH 04/34] Fix tests --- meilisearch-http/src/index/search.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index f9b1508f9..df553d6ef 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -423,6 +423,7 @@ mod test { let all_formatted = Vec::new(); let to_highlight_ids = HashSet::new(); + let to_crop_ids = HashSet::new(); let matching_words = MatchingWords::default(); @@ -433,8 +434,8 @@ mod test { &matching_words, &all_formatted, &to_highlight_ids, - ) - .unwrap(); + &to_crop_ids, + ).unwrap(); assert!(value.is_empty()); } @@ -458,6 +459,7 @@ mod test { let all_formatted = vec![id]; let to_highlight_ids = HashSet::new(); + let to_crop_ids = HashSet::new(); let matching_words = MatchingWords::default(); @@ -468,8 +470,8 @@ mod test { &matching_words, &all_formatted, &to_highlight_ids, - ) - .unwrap(); + &to_crop_ids, + ).unwrap(); assert_eq!(value["test"], "hello"); } @@ -493,6 +495,7 @@ mod test { let all_formatted = vec![id]; let to_highlight_ids = HashSet::from_iter(Some(id)); + let to_crop_ids = HashSet::new(); let matching_words = HashSet::from_iter(Some(String::from("hello"))); @@ -503,8 +506,8 @@ mod test { &matching_words, &all_formatted, &to_highlight_ids, - ) - .unwrap(); + &to_crop_ids, + ).unwrap(); assert_eq!(value["test"], "hello"); } From 56c9633c53c4369b5804569ef824cd977fb1cc12 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Tue, 11 May 2021 17:27:31 +0200 Subject: [PATCH 05/34] simple crop before --- meilisearch-http/src/index/search.rs | 38 +++++++++++++++------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index df553d6ef..ce9338d5f 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -7,7 +7,7 @@ use either::Either; use heed::RoTxn; use indexmap::IndexMap; use itertools::Itertools; -use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; +use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token}; use milli::{FilterCondition, FieldId, FieldsIdsMap, MatchingWords}; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -303,7 +303,7 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { &self, value: Value, matcher: &impl Matcher, - need_to_crop: Option, + need_to_crop: Option, need_to_highlight: bool, ) -> Value { match value { @@ -326,30 +326,34 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { value => value, } } - fn format_string(&self, s: String, matcher: &impl Matcher, need_to_crop: Option, need_to_highlight: bool) -> String { + fn format_string(&self, s: String, matcher: &impl Matcher, need_to_crop: Option, need_to_highlight: bool) -> String { let analyzed = self.analyzer.analyze(&s); - let word_iter: Box> = if let Some(_crop_len) = need_to_crop { - // cropping iterator - todo!() - } else { - Box::new(analyzed.reconstruct().map(|(word, token)| { - if token.is_word() && matcher.matches(token.text()){ - (word.to_string(), true) - } else { - (word.to_string(), false) - } - })) + + let tokens: Box> = match need_to_crop { + Some(crop_len) => { + let mut taken = 0; + let iter = analyzed + .reconstruct() + .skip_while(|(_, token)| !matcher.matches(token.text())) + .take_while(move |(word, _)| { + let take = taken < crop_len; + taken += word.chars().count(); + take + }); + Box::new(iter) + }, + None => Box::new(analyzed.reconstruct()), }; - word_iter.map(|(word, is_match)| { - if need_to_highlight && is_match { + tokens.map(|(word, token)| { + if need_to_highlight && token.is_word() && matcher.matches(token.text()){ let mut new_word = String::new(); new_word.push_str(&self.marks.0); new_word.push_str(&word); new_word.push_str(&self.marks.1); new_word } else { - word + word.to_string() } }) .collect::() From 7473cc6e27fe2658ee48f90afe4656042a6826ac Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Tue, 11 May 2021 18:30:55 +0200 Subject: [PATCH 06/34] implement crop around --- meilisearch-http/src/index/search.rs | 126 +++++++++++++++++---------- 1 file changed, 79 insertions(+), 47 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index ce9338d5f..760357b9a 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -1,6 +1,6 @@ -use std::borrow::Cow; -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, HashSet, VecDeque}; use std::time::Instant; +use std::{borrow::Cow, collections::HashMap}; use anyhow::bail; use either::Either; @@ -157,7 +157,12 @@ impl Index { let stop_words = fst::Set::default(); let highlighter = - Highlighter::new(&stop_words, (String::from(""), String::from(""))); + Formatter::new(&stop_words, (String::from(""), String::from(""))); + + let to_crop = to_crop_ids + .into_iter() + .map(|id| (id, query.crop_length)) + .collect::>(); for (_id, obkv) in self.documents(&rtxn, documents_ids)? { let document = make_document(&all_attributes, &fields_ids_map, obkv)?; @@ -168,7 +173,7 @@ impl Index { &matching_words, all_formatted.as_ref().as_slice(), &to_highlight_ids, - &to_crop_ids, + &to_crop, )?; let hit = SearchHit { document, @@ -230,11 +235,11 @@ fn make_document( fn compute_formatted>( field_ids_map: &FieldsIdsMap, obkv: obkv::KvReader, - highlighter: &Highlighter, + highlighter: &Formatter, matching_words: &impl Matcher, all_formatted: &[FieldId], to_highlight_fields: &HashSet, - to_crop_fields: &HashSet, + to_crop_fields: &HashMap>, ) -> anyhow::Result { let mut document = Document::new(); @@ -242,15 +247,12 @@ fn compute_formatted>( if let Some(value) = obkv.get(*field) { let mut value: Value = serde_json::from_slice(value)?; - let need_to_crop = if to_crop_fields.contains(field) { - Some(200) // TO CHANGE - } else { - None - }; - - if to_highlight_fields.contains(field) { - value = highlighter.format_value(value, matching_words, need_to_crop, to_highlight_fields.contains(field)); - } + value = highlighter.format_value( + value, + matching_words, + to_crop_fields.get(field).copied().flatten(), + to_highlight_fields.contains(field), + ); // This unwrap must be safe since we got the ids from the fields_ids_map just // before. @@ -284,12 +286,12 @@ impl Matcher for MatchingWords { } } -struct Highlighter<'a, A> { +struct Formatter<'a, A> { analyzer: Analyzer<'a, A>, marks: (String, String), } -impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { +impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { pub fn new(stop_words: &'a fst::Set, marks: (String, String)) -> Self { let mut config = AnalyzerConfig::default(); config.stop_words(stop_words); @@ -305,10 +307,11 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { matcher: &impl Matcher, need_to_crop: Option, need_to_highlight: bool, - ) -> Value { + ) -> Value { match value { Value::String(old_string) => { - let value = self.format_string(old_string, matcher, need_to_crop, need_to_highlight); + let value = + self.format_string(old_string, matcher, need_to_crop, need_to_highlight); Value::String(value) } Value::Array(values) => Value::Array( @@ -326,41 +329,67 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { value => value, } } - fn format_string(&self, s: String, matcher: &impl Matcher, need_to_crop: Option, need_to_highlight: bool) -> String { + fn format_string( + &self, + s: String, + matcher: &impl Matcher, + need_to_crop: Option, + need_to_highlight: bool, + ) -> String { let analyzed = self.analyzer.analyze(&s); - let tokens: Box> = match need_to_crop { + let tokens: Box> = match need_to_crop { Some(crop_len) => { - let mut taken = 0; - let iter = analyzed - .reconstruct() - .skip_while(|(_, token)| !matcher.matches(token.text())) + let mut buffer = VecDeque::new(); + let mut tokens = analyzed.reconstruct().peekable(); + let mut taken_before = 0; + while let Some((word, token)) = tokens.next_if(|(_, token)| !matcher.matches(token.text())) { + buffer.push_back((word, token)); + taken_before += word.chars().count(); + while taken_before > crop_len { + if let Some((word, _)) = buffer.pop_front() { + taken_before -= word.chars().count(); + } + } + } + + if let Some(token) = tokens.next() { + buffer.push_back(token); + } + + let mut taken_after = 0; + + let after_iter = tokens .take_while(move |(word, _)| { - let take = taken < crop_len; - taken += word.chars().count(); + let take = taken_after <= crop_len; + taken_after += word.chars().count(); take }); + let iter = buffer + .into_iter() + .chain(after_iter); + Box::new(iter) - }, + } None => Box::new(analyzed.reconstruct()), }; - tokens.map(|(word, token)| { - if need_to_highlight && token.is_word() && matcher.matches(token.text()){ - let mut new_word = String::new(); - new_word.push_str(&self.marks.0); - new_word.push_str(&word); - new_word.push_str(&self.marks.1); - new_word - } else { - word.to_string() - } - }) - .collect::() + tokens + .map(|(word, token)| { + if need_to_highlight && token.is_word() && matcher.matches(token.text()) { + let mut new_word = String::new(); + new_word.push_str(&self.marks.0); + new_word.push_str(&word); + new_word.push_str(&self.marks.1); + new_word + } else { + word.to_string() + } + }) + .collect::() } } - fn parse_facets( facets: &Value, index: &Index, @@ -412,7 +441,7 @@ mod test { fn no_formatted() { let stop_words = fst::Set::default(); let highlighter = - Highlighter::new(&stop_words, (String::from(""), String::from(""))); + Formatter::new(&stop_words, (String::from(""), String::from(""))); let mut fields = FieldsIdsMap::new(); let id = fields.insert("test").unwrap(); @@ -439,7 +468,8 @@ mod test { &all_formatted, &to_highlight_ids, &to_crop_ids, - ).unwrap(); + ) + .unwrap(); assert!(value.is_empty()); } @@ -448,7 +478,7 @@ mod test { fn formatted_no_highlight() { let stop_words = fst::Set::default(); let highlighter = - Highlighter::new(&stop_words, (String::from(""), String::from(""))); + Formatter::new(&stop_words, (String::from(""), String::from(""))); let mut fields = FieldsIdsMap::new(); let id = fields.insert("test").unwrap(); @@ -475,7 +505,8 @@ mod test { &all_formatted, &to_highlight_ids, &to_crop_ids, - ).unwrap(); + ) + .unwrap(); assert_eq!(value["test"], "hello"); } @@ -484,7 +515,7 @@ mod test { fn formatted_with_highlight() { let stop_words = fst::Set::default(); let highlighter = - Highlighter::new(&stop_words, (String::from(""), String::from(""))); + Formatter::new(&stop_words, (String::from(""), String::from(""))); let mut fields = FieldsIdsMap::new(); let id = fields.insert("test").unwrap(); @@ -511,7 +542,8 @@ mod test { &all_formatted, &to_highlight_ids, &to_crop_ids, - ).unwrap(); + ) + .unwrap(); assert_eq!(value["test"], "hello"); } From caaf8d3f4077317d497bd777379a3441b00ddc5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Thu, 3 Jun 2021 17:54:53 +0200 Subject: [PATCH 07/34] Fix tests --- meilisearch-http/src/index/search.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 760357b9a..eb6f98d87 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -456,7 +456,7 @@ mod test { let all_formatted = Vec::new(); let to_highlight_ids = HashSet::new(); - let to_crop_ids = HashSet::new(); + let to_crop_ids = HashMap::new(); let matching_words = MatchingWords::default(); @@ -493,7 +493,7 @@ mod test { let all_formatted = vec![id]; let to_highlight_ids = HashSet::new(); - let to_crop_ids = HashSet::new(); + let to_crop_ids = HashMap::new(); let matching_words = MatchingWords::default(); @@ -530,7 +530,7 @@ mod test { let all_formatted = vec![id]; let to_highlight_ids = HashSet::from_iter(Some(id)); - let to_crop_ids = HashSet::new(); + let to_crop_ids = HashMap::new(); let matching_words = HashSet::from_iter(Some(String::from("hello"))); From 811bc2f421567cfe7b8cfc2a2fb5e92f770ae580 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Fri, 4 Jun 2021 02:25:38 +0200 Subject: [PATCH 08/34] Around to previous word --- meilisearch-http/src/index/search.rs | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index eb6f98d87..429b5582a 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -16,8 +16,12 @@ use super::Index; pub type Document = IndexMap; -pub const DEFAULT_SEARCH_LIMIT: usize = 20; +// pub const DEFAULT_CROP_LENGTH: usize = 5; +// const fn default_crop_length() -> Option { +// Some(DEFAULT_CROP_LENGTH) +// } +pub const DEFAULT_SEARCH_LIMIT: usize = 20; const fn default_search_limit() -> usize { DEFAULT_SEARCH_LIMIT } @@ -31,6 +35,7 @@ pub struct SearchQuery { pub limit: usize, pub attributes_to_retrieve: Option>, pub attributes_to_crop: Option>, + // #[serde(default = "default_crop_length")] pub crop_length: Option, pub attributes_to_highlight: Option>, pub matches: Option, @@ -162,6 +167,7 @@ impl Index { let to_crop = to_crop_ids .into_iter() .map(|id| (id, query.crop_length)) + // .map(|id| (id, Some(5))) .collect::>(); for (_id, obkv) in self.documents(&rtxn, documents_ids)? { @@ -346,7 +352,13 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { while let Some((word, token)) = tokens.next_if(|(_, token)| !matcher.matches(token.text())) { buffer.push_back((word, token)); taken_before += word.chars().count(); - while taken_before > crop_len { + while taken_before >= crop_len { + // Around to the previous word + if let Some((word, _)) = buffer.front() { + if taken_before - word.chars().count() < crop_len { + break; + } + } if let Some((word, _)) = buffer.pop_front() { taken_before -= word.chars().count(); } @@ -358,13 +370,13 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { } let mut taken_after = 0; - let after_iter = tokens .take_while(move |(word, _)| { - let take = taken_after <= crop_len; + let take = taken_after < crop_len; taken_after += word.chars().count(); take }); + let iter = buffer .into_iter() .chain(after_iter); From 0da8fa115e90302e3ccbd62dccd6a94b2bfc8272 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Tue, 8 Jun 2021 17:33:20 +0200 Subject: [PATCH 09/34] Add custom croplength for attributes to crop --- meilisearch-http/src/index/search.rs | 61 +++++++++++++++++++-------- meilisearch-http/src/routes/search.rs | 2 +- 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 429b5582a..603ebf1b3 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -16,16 +16,16 @@ use super::Index; pub type Document = IndexMap; -// pub const DEFAULT_CROP_LENGTH: usize = 5; -// const fn default_crop_length() -> Option { -// Some(DEFAULT_CROP_LENGTH) -// } - pub const DEFAULT_SEARCH_LIMIT: usize = 20; const fn default_search_limit() -> usize { DEFAULT_SEARCH_LIMIT } +pub const DEFAULT_CROP_LENGTH: usize = 200; +const fn default_crop_length() -> Option { + Some(DEFAULT_CROP_LENGTH) +} + #[derive(Deserialize)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct SearchQuery { @@ -34,8 +34,8 @@ pub struct SearchQuery { #[serde(default = "default_search_limit")] pub limit: usize, pub attributes_to_retrieve: Option>, - pub attributes_to_crop: Option>, - // #[serde(default = "default_crop_length")] + pub attributes_to_crop: Option>, + #[serde(default = "default_crop_length")] pub crop_length: Option, pub attributes_to_highlight: Option>, pub matches: Option, @@ -126,12 +126,45 @@ impl Index { .map(fids) .unwrap_or_default(); - let to_crop_ids = query + let to_crop_ids_length = query .attributes_to_crop .as_ref() - .map(fids) + .map(|attributes: &Vec| { + let mut ids_length_crop = HashMap::new(); + for attribute in attributes { + let mut attr_name = attribute.clone(); + let mut attr_len = query.crop_length; + + if attr_name.contains(":") { + let mut split = attr_name.rsplit(':'); + attr_len = match split.nth(0) { + Some(s) => s.parse::().ok(), + None => None, + }; + attr_name = split.flat_map(|s| s.chars()).collect(); + } + + if attr_name == "*" { + let ids = displayed_ids.clone(); + for id in ids { + ids_length_crop.insert(id, attr_len); + } + } + + if let Some(id) = fields_ids_map.id(&attr_name) { + ids_length_crop.insert(id, attr_len); + } + } + ids_length_crop + }) .unwrap_or_default(); + let to_crop_ids = to_crop_ids_length + .clone() + .into_iter() + .map(|(k, _)| k) + .collect::>(); + // The attributes to retrieve are: // - the ones explicitly marked as to retrieve that are also in the displayed attributes let all_attributes: Vec<_> = to_retrieve_ids @@ -164,12 +197,6 @@ impl Index { let highlighter = Formatter::new(&stop_words, (String::from(""), String::from(""))); - let to_crop = to_crop_ids - .into_iter() - .map(|id| (id, query.crop_length)) - // .map(|id| (id, Some(5))) - .collect::>(); - for (_id, obkv) in self.documents(&rtxn, documents_ids)? { let document = make_document(&all_attributes, &fields_ids_map, obkv)?; let formatted = compute_formatted( @@ -179,7 +206,7 @@ impl Index { &matching_words, all_formatted.as_ref().as_slice(), &to_highlight_ids, - &to_crop, + &to_crop_ids_length, )?; let hit = SearchHit { document, @@ -352,7 +379,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { while let Some((word, token)) = tokens.next_if(|(_, token)| !matcher.matches(token.text())) { buffer.push_back((word, token)); taken_before += word.chars().count(); - while taken_before >= crop_len { + while taken_before > crop_len { // Around to the previous word if let Some((word, _)) = buffer.front() { if taken_before - word.chars().count() < crop_len { diff --git a/meilisearch-http/src/routes/search.rs b/meilisearch-http/src/routes/search.rs index be06960cf..8489215e7 100644 --- a/meilisearch-http/src/routes/search.rs +++ b/meilisearch-http/src/routes/search.rs @@ -40,7 +40,7 @@ impl TryFrom for SearchQuery { let attributes_to_crop = other .attributes_to_crop - .map(|attrs| attrs.split(',').map(String::from).collect::>()); + .map(|attrs| attrs.split(',').map(String::from).collect::>()); let attributes_to_highlight = other .attributes_to_highlight From 9e69f33f3c3b9792cc85314bc786440fd1a16bdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Tue, 8 Jun 2021 18:02:04 +0200 Subject: [PATCH 10/34] Fix clippy errors --- meilisearch-http/src/index/search.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 603ebf1b3..bedde0997 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -135,9 +135,9 @@ impl Index { let mut attr_name = attribute.clone(); let mut attr_len = query.crop_length; - if attr_name.contains(":") { + if attr_name.contains(':') { let mut split = attr_name.rsplit(':'); - attr_len = match split.nth(0) { + attr_len = match split.next() { Some(s) => s.parse::().ok(), None => None, }; From 4f8c771bb567ed96b14758e8d836911efa4b528a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Sun, 13 Jun 2021 11:53:29 +0200 Subject: [PATCH 11/34] Add new line --- meilisearch-http/src/index/search.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index bedde0997..ae476318e 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -362,6 +362,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { value => value, } } + fn format_string( &self, s: String, From 7f84f59472dcab518e215e97904d3144a9d9b9d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Sun, 13 Jun 2021 12:00:38 +0200 Subject: [PATCH 12/34] Reorganize imports --- meilisearch-http/src/index/search.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index ae476318e..47a9bd4fb 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -1,6 +1,6 @@ -use std::collections::{BTreeMap, HashSet, VecDeque}; +use std::borrow::Cow; +use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; use std::time::Instant; -use std::{borrow::Cow, collections::HashMap}; use anyhow::bail; use either::Either; From 638009fb2b31d9970b28ce642f1a78a8b2f658b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Sun, 13 Jun 2021 12:29:24 +0200 Subject: [PATCH 13/34] Rename highlighter variable into formatter --- meilisearch-http/src/index/search.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 47a9bd4fb..acabcf46d 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -194,7 +194,7 @@ impl Index { }; let stop_words = fst::Set::default(); - let highlighter = + let formatter = Formatter::new(&stop_words, (String::from(""), String::from(""))); for (_id, obkv) in self.documents(&rtxn, documents_ids)? { @@ -202,7 +202,7 @@ impl Index { let formatted = compute_formatted( &fields_ids_map, obkv, - &highlighter, + &formatter, &matching_words, all_formatted.as_ref().as_slice(), &to_highlight_ids, @@ -268,7 +268,7 @@ fn make_document( fn compute_formatted>( field_ids_map: &FieldsIdsMap, obkv: obkv::KvReader, - highlighter: &Formatter, + formatter: &Formatter, matching_words: &impl Matcher, all_formatted: &[FieldId], to_highlight_fields: &HashSet, @@ -280,7 +280,7 @@ fn compute_formatted>( if let Some(value) = obkv.get(*field) { let mut value: Value = serde_json::from_slice(value)?; - value = highlighter.format_value( + value = formatter.format_value( value, matching_words, to_crop_fields.get(field).copied().flatten(), @@ -301,7 +301,7 @@ fn compute_formatted>( Ok(document) } -/// trait to allow unit testing of `compute_formated` +/// trait to allow unit testing of `compute_formatted` trait Matcher { fn matches(&self, w: &str) -> bool; } @@ -480,7 +480,7 @@ mod test { #[test] fn no_formatted() { let stop_words = fst::Set::default(); - let highlighter = + let formatter = Formatter::new(&stop_words, (String::from(""), String::from(""))); let mut fields = FieldsIdsMap::new(); @@ -503,7 +503,7 @@ mod test { let value = compute_formatted( &fields, obkv, - &highlighter, + &formatter, &matching_words, &all_formatted, &to_highlight_ids, @@ -517,7 +517,7 @@ mod test { #[test] fn formatted_no_highlight() { let stop_words = fst::Set::default(); - let highlighter = + let formatter = Formatter::new(&stop_words, (String::from(""), String::from(""))); let mut fields = FieldsIdsMap::new(); @@ -540,7 +540,7 @@ mod test { let value = compute_formatted( &fields, obkv, - &highlighter, + &formatter, &matching_words, &all_formatted, &to_highlight_ids, @@ -554,7 +554,7 @@ mod test { #[test] fn formatted_with_highlight() { let stop_words = fst::Set::default(); - let highlighter = + let formatter = Formatter::new(&stop_words, (String::from(""), String::from(""))); let mut fields = FieldsIdsMap::new(); @@ -577,7 +577,7 @@ mod test { let value = compute_formatted( &fields, obkv, - &highlighter, + &formatter, &matching_words, &all_formatted, &to_highlight_ids, From 65130d9ee7aa380dc42a404032bf50aa79eb80ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Sun, 13 Jun 2021 12:37:38 +0200 Subject: [PATCH 14/34] Change crop_length type from Option(usize) to usize --- meilisearch-http/src/index/search.rs | 8 ++++---- meilisearch-http/src/routes/search.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index acabcf46d..7e93b7099 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -22,8 +22,8 @@ const fn default_search_limit() -> usize { } pub const DEFAULT_CROP_LENGTH: usize = 200; -const fn default_crop_length() -> Option { - Some(DEFAULT_CROP_LENGTH) +const fn default_crop_length() -> usize { + DEFAULT_CROP_LENGTH } #[derive(Deserialize)] @@ -36,7 +36,7 @@ pub struct SearchQuery { pub attributes_to_retrieve: Option>, pub attributes_to_crop: Option>, #[serde(default = "default_crop_length")] - pub crop_length: Option, + pub crop_length: usize, pub attributes_to_highlight: Option>, pub matches: Option, pub filter: Option, @@ -133,7 +133,7 @@ impl Index { let mut ids_length_crop = HashMap::new(); for attribute in attributes { let mut attr_name = attribute.clone(); - let mut attr_len = query.crop_length; + let mut attr_len = Some(query.crop_length); if attr_name.contains(':') { let mut split = attr_name.rsplit(':'); diff --git a/meilisearch-http/src/routes/search.rs b/meilisearch-http/src/routes/search.rs index 8489215e7..c2c83e3c8 100644 --- a/meilisearch-http/src/routes/search.rs +++ b/meilisearch-http/src/routes/search.rs @@ -23,7 +23,7 @@ pub struct SearchQueryGet { limit: Option, attributes_to_retrieve: Option, attributes_to_crop: Option, - crop_length: Option, + crop_length: usize, attributes_to_highlight: Option, filter: Option, matches: Option, From d0ec081e4900171ecf103c21a45b49bd7bdf2505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Sun, 13 Jun 2021 23:51:33 +0200 Subject: [PATCH 15/34] Refacto --- meilisearch-http/src/index/search.rs | 492 +++++++++++++++++++++------ 1 file changed, 380 insertions(+), 112 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 7e93b7099..b2c606c02 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -1,4 +1,4 @@ -use std::borrow::Cow; +// use std::borrow::Cow; use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; use std::time::Instant; @@ -65,6 +65,12 @@ pub struct SearchResult { pub facet_distributions: Option>>, } +#[derive(Copy, Clone)] +struct FormatOptions { + highlight: bool, + crop: Option, +} + impl Index { pub fn perform_search(&self, query: SearchQuery) -> anyhow::Result { let before_search = Instant::now(); @@ -108,7 +114,9 @@ impl Index { } if let Some(id) = fields_ids_map.id(attr) { - ids.insert(id); + if displayed_ids.contains(&id) { + ids.insert(id); + } } } ids @@ -120,51 +128,6 @@ impl Index { .map(fids) .unwrap_or_else(|| displayed_ids.clone()); - let to_highlight_ids = query - .attributes_to_highlight - .as_ref() - .map(fids) - .unwrap_or_default(); - - let to_crop_ids_length = query - .attributes_to_crop - .as_ref() - .map(|attributes: &Vec| { - let mut ids_length_crop = HashMap::new(); - for attribute in attributes { - let mut attr_name = attribute.clone(); - let mut attr_len = Some(query.crop_length); - - if attr_name.contains(':') { - let mut split = attr_name.rsplit(':'); - attr_len = match split.next() { - Some(s) => s.parse::().ok(), - None => None, - }; - attr_name = split.flat_map(|s| s.chars()).collect(); - } - - if attr_name == "*" { - let ids = displayed_ids.clone(); - for id in ids { - ids_length_crop.insert(id, attr_len); - } - } - - if let Some(id) = fields_ids_map.id(&attr_name) { - ids_length_crop.insert(id, attr_len); - } - } - ids_length_crop - }) - .unwrap_or_default(); - - let to_crop_ids = to_crop_ids_length - .clone() - .into_iter() - .map(|(k, _)| k) - .collect::>(); - // The attributes to retrieve are: // - the ones explicitly marked as to retrieve that are also in the displayed attributes let all_attributes: Vec<_> = to_retrieve_ids @@ -173,25 +136,152 @@ impl Index { .sorted() .collect(); + let mut formatted_options = HashMap::new(); + + let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default(); + for attr in attr_to_highlight { + let new_format = FormatOptions { + highlight: true, + crop: None, + }; + + if attr == "*" { + let ids = displayed_ids.clone(); + for id in ids { + formatted_options.insert(id, new_format); + } + break; + } + + if let Some(id) = fields_ids_map.id(&attr) { + if displayed_ids.contains(&id) { + formatted_options.insert(id, new_format); + } + } + }; + + let attr_to_crop = query.attributes_to_crop.unwrap_or_default(); + for attr in attr_to_crop { + let mut attr_name = attr.clone(); + let mut attr_len = Some(query.crop_length); + + if attr_name.contains(':') { + let mut split = attr_name.rsplit(':'); + attr_len = match split.next() { + Some(s) => s.parse::().ok(), + None => None, + }; + attr_name = split.flat_map(|s| s.chars()).collect(); + } + + if attr_name == "*" { + let ids = displayed_ids.clone(); + for id in ids { + let mut highlight = false; + if let Some(f) = formatted_options.get(&id) { + highlight = f.highlight; + } + formatted_options.insert(id, FormatOptions { + highlight: highlight, + crop: attr_len, + }); + } + } + + if let Some(id) = fields_ids_map.id(&attr_name) { + if displayed_ids.contains(&id) { + let mut highlight = false; + if let Some(f) = formatted_options.get(&id) { + highlight = f.highlight; + } + formatted_options.insert(id, FormatOptions { + highlight: highlight, + crop: attr_len, + }); + } + } + } + + let formatted_ids = formatted_options + .keys() + .cloned() + .collect::>(); + + // All attributes present in `_formatted` that are not necessary highighted or croped + let ids_in_formatted = formatted_ids + .union(&to_retrieve_ids) + .cloned() + .sorted() + .collect::>(); + + + // let to_highlight_ids = query // PLUS BESOIN + // .attributes_to_highlight + // .as_ref() + // .map(fids) + // .unwrap_or_default(); + + + // let to_crop_ids_length = query + // .attributes_to_crop + // .as_ref() + // .map(|attributes: &Vec| { + // let mut ids_length_crop = HashMap::new(); + // for attribute in attributes { + // let mut attr_name = attribute.clone(); + // let mut attr_len = Some(query.crop_length); + + // if attr_name.contains(':') { + // let mut split = attr_name.rsplit(':'); + // attr_len = match split.next() { + // Some(s) => s.parse::().ok(), + // None => None, + // }; + // attr_name = split.flat_map(|s| s.chars()).collect(); + // } + + // if attr_name == "*" { + // let ids = displayed_ids.clone(); + // for id in ids { + // ids_length_crop.insert(id, attr_len); + // } + // } + + // if let Some(id) = fields_ids_map.id(&attr_name) { + // if displayed_ids.contains(&id) { + // ids_length_crop.insert(id, attr_len); + // } + // } + // } + // ids_length_crop + // }) + // .unwrap_or_default(); + + // let to_crop_ids = to_crop_ids_length // PLUS BESOIN + // .clone() + // .into_iter() + // .map(|(k, _)| k) + // .collect::>(); + // The formatted attributes are: // - The one in either highlighted attributes or cropped attributes if there are attributes // to retrieve // - All the attributes to retrieve if there are either highlighted or cropped attributes // the request specified that all attributes are to retrieve (i.e attributes to retrieve is // empty in the query) - let all_formatted = if query.attributes_to_retrieve.is_none() { - if query.attributes_to_highlight.is_some() || query.attributes_to_crop.is_some() { - Cow::Borrowed(&all_attributes) - } else { - Cow::Owned(Vec::new()) - } - } else { - let attrs = (&to_crop_ids | &to_highlight_ids) - .intersection(&displayed_ids) - .cloned() - .collect::>(); - Cow::Owned(attrs) - }; + // let all_formatted = if query.attributes_to_retrieve.is_none() { + // if query.attributes_to_highlight.is_some() || query.attributes_to_crop.is_some() { + // Cow::Borrowed(&all_attributes) + // } else { + // Cow::Owned(Vec::new()) + // } + // } else { + // let attrs = (&to_crop_ids | &to_highlight_ids) + // .intersection(&displayed_ids) + // .cloned() + // .collect::>(); + // Cow::Owned(attrs) + // }; let stop_words = fst::Set::default(); let formatter = @@ -204,9 +294,11 @@ impl Index { obkv, &formatter, &matching_words, - all_formatted.as_ref().as_slice(), - &to_highlight_ids, - &to_crop_ids_length, + &ids_in_formatted, + // all_formatted.as_ref().as_slice(), + &formatted_options, + // &to_highlight_ids, //ICI + // &to_crop_ids_length, //ICI )?; let hit = SearchHit { document, @@ -270,31 +362,38 @@ fn compute_formatted>( obkv: obkv::KvReader, formatter: &Formatter, matching_words: &impl Matcher, - all_formatted: &[FieldId], - to_highlight_fields: &HashSet, - to_crop_fields: &HashMap>, + ids_in_formatted: &Vec, + formatted_options: &HashMap, + // to_highlight_fields: &HashSet, //ICI + // to_crop_fields: &HashMap>, //ICI ) -> anyhow::Result { let mut document = Document::new(); - for field in all_formatted { - if let Some(value) = obkv.get(*field) { - let mut value: Value = serde_json::from_slice(value)?; + if formatted_options.len() > 0 { + for field in ids_in_formatted { + if let Some(value) = obkv.get(*field) { + let mut value: Value = serde_json::from_slice(value)?; - value = formatter.format_value( - value, - matching_words, - to_crop_fields.get(field).copied().flatten(), - to_highlight_fields.contains(field), - ); + if let Some(format) = formatted_options.get(field) { + value = formatter.format_value( + value, + matching_words, + format.highlight, + format.crop, + // to_crop_fields.get(field).copied().flatten(), //ICI + // to_highlight_fields.contains(field), //ICI + ); + } - // This unwrap must be safe since we got the ids from the fields_ids_map just - // before. - let key = field_ids_map - .name(*field) - .expect("Missing field name") - .to_string(); + // This unwrap must be safe since we got the ids from the fields_ids_map just + // before. + let key = field_ids_map + .name(*field) + .expect("Missing field name") + .to_string(); - document.insert(key, value); + document.insert(key, value); + } } } @@ -338,25 +437,25 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { &self, value: Value, matcher: &impl Matcher, - need_to_crop: Option, need_to_highlight: bool, + need_to_crop: Option, ) -> Value { match value { Value::String(old_string) => { let value = - self.format_string(old_string, matcher, need_to_crop, need_to_highlight); + self.format_string(old_string, matcher, need_to_highlight, need_to_crop); Value::String(value) } Value::Array(values) => Value::Array( values .into_iter() - .map(|v| self.format_value(v, matcher, None, need_to_highlight)) + .map(|v| self.format_value(v, matcher, need_to_highlight, None)) .collect(), ), Value::Object(object) => Value::Object( object .into_iter() - .map(|(k, v)| (k, self.format_value(v, matcher, None, need_to_highlight))) + .map(|(k, v)| (k, self.format_value(v, matcher, need_to_highlight, None))) .collect(), ), value => value, @@ -367,8 +466,8 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { &self, s: String, matcher: &impl Matcher, - need_to_crop: Option, need_to_highlight: bool, + need_to_crop: Option, ) -> String { let analyzed = self.analyzer.analyze(&s); @@ -478,7 +577,7 @@ mod test { use super::*; #[test] - fn no_formatted() { + fn no_ids_no_formatted() { let stop_words = fst::Set::default(); let formatter = Formatter::new(&stop_words, (String::from(""), String::from(""))); @@ -494,9 +593,8 @@ mod test { let obkv = obkv::KvReader::new(&buf); - let all_formatted = Vec::new(); - let to_highlight_ids = HashSet::new(); - let to_crop_ids = HashMap::new(); + let ids_in_formatted = Vec::new(); + let formatted_options = HashMap::new(); let matching_words = MatchingWords::default(); @@ -505,9 +603,8 @@ mod test { obkv, &formatter, &matching_words, - &all_formatted, - &to_highlight_ids, - &to_crop_ids, + &ids_in_formatted, + &formatted_options, ) .unwrap(); @@ -515,7 +612,7 @@ mod test { } #[test] - fn formatted_no_highlight() { + fn no_formatted_with_ids() { let stop_words = fst::Set::default(); let formatter = Formatter::new(&stop_words, (String::from(""), String::from(""))); @@ -531,9 +628,8 @@ mod test { let obkv = obkv::KvReader::new(&buf); - let all_formatted = vec![id]; - let to_highlight_ids = HashSet::new(); - let to_crop_ids = HashMap::new(); + let ids_in_formatted = vec![id]; + let formatted_options = HashMap::new(); let matching_words = MatchingWords::default(); @@ -542,13 +638,12 @@ mod test { obkv, &formatter, &matching_words, - &all_formatted, - &to_highlight_ids, - &to_crop_ids, + &ids_in_formatted, + &formatted_options, ) .unwrap(); - assert_eq!(value["test"], "hello"); + assert!(value.is_empty()); } #[test] @@ -558,33 +653,206 @@ mod test { Formatter::new(&stop_words, (String::from(""), String::from(""))); let mut fields = FieldsIdsMap::new(); - let id = fields.insert("test").unwrap(); + let title = fields.insert("title").unwrap(); + let author = fields.insert("author").unwrap(); let mut buf = Vec::new(); let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert(id, Value::String("hello".into()).to_string().as_bytes()) + obkv.insert(title, Value::String("The Hobbit".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(author, Value::String("J. R. R. Tolkien".into()).to_string().as_bytes()) .unwrap(); obkv.finish().unwrap(); let obkv = obkv::KvReader::new(&buf); - let all_formatted = vec![id]; - let to_highlight_ids = HashSet::from_iter(Some(id)); - let to_crop_ids = HashMap::new(); + let ids_in_formatted = vec![title, author]; + let mut formatted_options = HashMap::new(); + formatted_options.insert(title, FormatOptions { highlight: true, crop: None }); - let matching_words = HashSet::from_iter(Some(String::from("hello"))); + let matching_words = HashSet::from_iter(Some(String::from("hobbit"))); let value = compute_formatted( &fields, obkv, &formatter, &matching_words, - &all_formatted, - &to_highlight_ids, - &to_crop_ids, + &ids_in_formatted, + &formatted_options, ) .unwrap(); - assert_eq!(value["test"], "hello"); + assert_eq!(value["title"], "The Hobbit"); + assert_eq!(value["author"], "J. R. R. Tolkien"); + } + + #[test] + fn formatted_with_crop_2() { + let stop_words = fst::Set::default(); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); + + let mut fields = FieldsIdsMap::new(); + let title = fields.insert("title").unwrap(); + let author = fields.insert("author").unwrap(); + + let mut buf = Vec::new(); + let mut obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(title, Value::String("Harry Potter and the Half-Blood Prince".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(author, Value::String("J. K. Rowling".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + + let obkv = obkv::KvReader::new(&buf); + + let ids_in_formatted = vec![title, author]; + let mut formatted_options = HashMap::new(); + formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(2) }); + + let matching_words = HashSet::from_iter(Some(String::from("potter"))); + + let value = compute_formatted( + &fields, + obkv, + &formatter, + &matching_words, + &ids_in_formatted, + &formatted_options, + ) + .unwrap(); + + assert_eq!(value["title"], "Harry Potter and"); + assert_eq!(value["author"], "J. K. Rowling"); + } + + #[test] + fn formatted_with_crop_10() { + let stop_words = fst::Set::default(); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); + + let mut fields = FieldsIdsMap::new(); + let title = fields.insert("title").unwrap(); + let author = fields.insert("author").unwrap(); + + let mut buf = Vec::new(); + let mut obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(title, Value::String("Harry Potter and the Half-Blood Prince".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(author, Value::String("J. K. Rowling".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + + let obkv = obkv::KvReader::new(&buf); + + let ids_in_formatted = vec![title, author]; + let mut formatted_options = HashMap::new(); + formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(10) }); + + let matching_words = HashSet::from_iter(Some(String::from("potter"))); + + let value = compute_formatted( + &fields, + obkv, + &formatter, + &matching_words, + &ids_in_formatted, + &formatted_options, + ) + .unwrap(); + + assert_eq!(value["title"], "Harry Potter and the Half"); + assert_eq!(value["author"], "J. K. Rowling"); + } + + #[test] + fn formatted_with_crop_0() { + let stop_words = fst::Set::default(); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); + + let mut fields = FieldsIdsMap::new(); + let title = fields.insert("title").unwrap(); + let author = fields.insert("author").unwrap(); + + let mut buf = Vec::new(); + let mut obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(title, Value::String("Harry Potter and the Half-Blood Prince".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(author, Value::String("J. K. Rowling".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + + let obkv = obkv::KvReader::new(&buf); + + let ids_in_formatted = vec![title, author]; + let mut formatted_options = HashMap::new(); + formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(0) }); + + let matching_words = HashSet::from_iter(Some(String::from("potter"))); + + let value = compute_formatted( + &fields, + obkv, + &formatter, + &matching_words, + &ids_in_formatted, + &formatted_options, + ) + .unwrap(); + + assert_eq!(value["title"], "Potter"); + assert_eq!(value["author"], "J. K. Rowling"); + } + + #[test] + fn formatted_with_crop_and_highlight() { + let stop_words = fst::Set::default(); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); + + let mut fields = FieldsIdsMap::new(); + let title = fields.insert("title").unwrap(); + let author = fields.insert("author").unwrap(); + + let mut buf = Vec::new(); + let mut obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(title, Value::String("Harry Potter and the Half-Blood Prince".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(author, Value::String("J. K. Rowling".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + + let obkv = obkv::KvReader::new(&buf); + + let ids_in_formatted = vec![title, author]; + let mut formatted_options = HashMap::new(); + formatted_options.insert(title, FormatOptions { highlight: true, crop: Some(1) }); + + let matching_words = HashSet::from_iter(Some(String::from("and"))); + + let value = compute_formatted( + &fields, + obkv, + &formatter, + &matching_words, + &ids_in_formatted, + &formatted_options, + ) + .unwrap(); + + assert_eq!(value["title"], " and "); + assert_eq!(value["author"], "J. K. Rowling"); } } From 446b66b0fed92769606dc841da4f22921466a3cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Mon, 14 Jun 2021 14:59:38 +0200 Subject: [PATCH 16/34] Fix cargo clippy error --- meilisearch-http/src/index/search.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index b2c606c02..56fa5425f 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -182,7 +182,7 @@ impl Index { highlight = f.highlight; } formatted_options.insert(id, FormatOptions { - highlight: highlight, + highlight, crop: attr_len, }); } @@ -195,7 +195,7 @@ impl Index { highlight = f.highlight; } formatted_options.insert(id, FormatOptions { - highlight: highlight, + highlight, crop: attr_len, }); } @@ -362,14 +362,14 @@ fn compute_formatted>( obkv: obkv::KvReader, formatter: &Formatter, matching_words: &impl Matcher, - ids_in_formatted: &Vec, + ids_in_formatted: &[FieldId], formatted_options: &HashMap, // to_highlight_fields: &HashSet, //ICI // to_crop_fields: &HashMap>, //ICI ) -> anyhow::Result { let mut document = Document::new(); - if formatted_options.len() > 0 { + if !formatted_options.is_empty() { for field in ids_in_formatted { if let Some(value) = obkv.get(*field) { let mut value: Value = serde_json::from_slice(value)?; From b7698771833882dadd050813d4c1cbe8810112b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Mon, 14 Jun 2021 18:26:47 +0200 Subject: [PATCH 17/34] Make it compatible with the new milli highlighting --- meilisearch-http/src/index/search.rs | 131 ++++++++++++++++++++++++--- 1 file changed, 116 insertions(+), 15 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 56fa5425f..1d834234b 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -402,19 +402,25 @@ fn compute_formatted>( /// trait to allow unit testing of `compute_formatted` trait Matcher { - fn matches(&self, w: &str) -> bool; + fn matches(&self, w: &str) -> Option; } +// #[cfg(test)] +// impl Matcher for HashSet { +// fn matches(&self, w: &str) -> bool { +// self.contains(w) +// } +// } #[cfg(test)] -impl Matcher for HashSet { - fn matches(&self, w: &str) -> bool { - self.contains(w) +impl Matcher for HashMap<&str, Option> { + fn matches(&self, w: &str) -> Option { + self.get(w).cloned().flatten() } } impl Matcher for MatchingWords { - fn matches(&self, w: &str) -> bool { - self.matching_bytes(w).is_some() + fn matches(&self, w: &str) -> Option { + self.matching_bytes(w) } } @@ -476,7 +482,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { let mut buffer = VecDeque::new(); let mut tokens = analyzed.reconstruct().peekable(); let mut taken_before = 0; - while let Some((word, token)) = tokens.next_if(|(_, token)| !matcher.matches(token.text())) { + while let Some((word, token)) = tokens.next_if(|(_, token)| !matcher.matches(token.text()).is_some()) { buffer.push_back((word, token)); taken_before += word.chars().count(); while taken_before > crop_len { @@ -515,11 +521,14 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { tokens .map(|(word, token)| { - if need_to_highlight && token.is_word() && matcher.matches(token.text()) { + if need_to_highlight && token.is_word() && matcher.matches(token.text()).is_some() { let mut new_word = String::new(); new_word.push_str(&self.marks.0); - new_word.push_str(&word); - new_word.push_str(&self.marks.1); + if let Some(match_len) = matcher.matches(token.text()) { + new_word.push_str(&word[..match_len]); + new_word.push_str(&self.marks.1); + new_word.push_str(&word[match_len..]); + } new_word } else { word.to_string() @@ -672,7 +681,9 @@ mod test { let mut formatted_options = HashMap::new(); formatted_options.insert(title, FormatOptions { highlight: true, crop: None }); - let matching_words = HashSet::from_iter(Some(String::from("hobbit"))); + // let matching_words = HashSet::from_iter(Some(String::from("hobbit"))); + let mut matching_words = HashMap::new(); + matching_words.insert("hobbit", Some(6)); let value = compute_formatted( &fields, @@ -688,6 +699,49 @@ mod test { assert_eq!(value["author"], "J. R. R. Tolkien"); } + #[test] + fn formatted_with_highlight_in_word() { + let stop_words = fst::Set::default(); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); + + let mut fields = FieldsIdsMap::new(); + let title = fields.insert("title").unwrap(); + let author = fields.insert("author").unwrap(); + + let mut buf = Vec::new(); + let mut obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(title, Value::String("The Hobbit".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(author, Value::String("J. R. R. Tolkien".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + + let obkv = obkv::KvReader::new(&buf); + + let ids_in_formatted = vec![title, author]; + let mut formatted_options = HashMap::new(); + formatted_options.insert(title, FormatOptions { highlight: true, crop: None }); + + let mut matching_words = HashMap::new(); + matching_words.insert("hobbit", Some(3)); + + let value = compute_formatted( + &fields, + obkv, + &formatter, + &matching_words, + &ids_in_formatted, + &formatted_options, + ) + .unwrap(); + + assert_eq!(value["title"], "The Hobbit"); + assert_eq!(value["author"], "J. R. R. Tolkien"); + } + #[test] fn formatted_with_crop_2() { let stop_words = fst::Set::default(); @@ -714,7 +768,8 @@ mod test { let mut formatted_options = HashMap::new(); formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(2) }); - let matching_words = HashSet::from_iter(Some(String::from("potter"))); + let mut matching_words = HashMap::new(); + matching_words.insert("potter", Some(6)); let value = compute_formatted( &fields, @@ -756,7 +811,8 @@ mod test { let mut formatted_options = HashMap::new(); formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(10) }); - let matching_words = HashSet::from_iter(Some(String::from("potter"))); + let mut matching_words = HashMap::new(); + matching_words.insert("potter", Some(6)); let value = compute_formatted( &fields, @@ -798,7 +854,8 @@ mod test { let mut formatted_options = HashMap::new(); formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(0) }); - let matching_words = HashSet::from_iter(Some(String::from("potter"))); + let mut matching_words = HashMap::new(); + matching_words.insert("potter", Some(6)); let value = compute_formatted( &fields, @@ -840,7 +897,8 @@ mod test { let mut formatted_options = HashMap::new(); formatted_options.insert(title, FormatOptions { highlight: true, crop: Some(1) }); - let matching_words = HashSet::from_iter(Some(String::from("and"))); + let mut matching_words = HashMap::new(); + matching_words.insert("and", Some(3)); let value = compute_formatted( &fields, @@ -855,4 +913,47 @@ mod test { assert_eq!(value["title"], " and "); assert_eq!(value["author"], "J. K. Rowling"); } + + #[test] + fn formatted_with_crop_and_highlight_in_word() { + let stop_words = fst::Set::default(); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); + + let mut fields = FieldsIdsMap::new(); + let title = fields.insert("title").unwrap(); + let author = fields.insert("author").unwrap(); + + let mut buf = Vec::new(); + let mut obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(title, Value::String("Harry Potter and the Half-Blood Prince".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(author, Value::String("J. K. Rowling".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + + let obkv = obkv::KvReader::new(&buf); + + let ids_in_formatted = vec![title, author]; + let mut formatted_options = HashMap::new(); + formatted_options.insert(title, FormatOptions { highlight: true, crop: Some(9) }); + + let mut matching_words = HashMap::new(); + matching_words.insert("blood", Some(3)); + + let value = compute_formatted( + &fields, + obkv, + &formatter, + &matching_words, + &ids_in_formatted, + &formatted_options, + ) + .unwrap(); + + assert_eq!(value["title"], "the Half-Blood Prince"); + assert_eq!(value["author"], "J. K. Rowling"); + } } From 5dffe566fd59a5cdc6d40df9f8dc0b5328cc2a97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Mon, 14 Jun 2021 18:40:15 +0200 Subject: [PATCH 18/34] Remove useless comments --- meilisearch-http/src/index/search.rs | 86 +--------------------------- 1 file changed, 1 insertion(+), 85 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 1d834234b..37aea6248 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -1,4 +1,3 @@ -// use std::borrow::Cow; use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; use std::time::Instant; @@ -207,82 +206,13 @@ impl Index { .cloned() .collect::>(); - // All attributes present in `_formatted` that are not necessary highighted or croped + // All attributes present in `_formatted` that are not necessary highighted or cropped let ids_in_formatted = formatted_ids .union(&to_retrieve_ids) .cloned() .sorted() .collect::>(); - - // let to_highlight_ids = query // PLUS BESOIN - // .attributes_to_highlight - // .as_ref() - // .map(fids) - // .unwrap_or_default(); - - - // let to_crop_ids_length = query - // .attributes_to_crop - // .as_ref() - // .map(|attributes: &Vec| { - // let mut ids_length_crop = HashMap::new(); - // for attribute in attributes { - // let mut attr_name = attribute.clone(); - // let mut attr_len = Some(query.crop_length); - - // if attr_name.contains(':') { - // let mut split = attr_name.rsplit(':'); - // attr_len = match split.next() { - // Some(s) => s.parse::().ok(), - // None => None, - // }; - // attr_name = split.flat_map(|s| s.chars()).collect(); - // } - - // if attr_name == "*" { - // let ids = displayed_ids.clone(); - // for id in ids { - // ids_length_crop.insert(id, attr_len); - // } - // } - - // if let Some(id) = fields_ids_map.id(&attr_name) { - // if displayed_ids.contains(&id) { - // ids_length_crop.insert(id, attr_len); - // } - // } - // } - // ids_length_crop - // }) - // .unwrap_or_default(); - - // let to_crop_ids = to_crop_ids_length // PLUS BESOIN - // .clone() - // .into_iter() - // .map(|(k, _)| k) - // .collect::>(); - - // The formatted attributes are: - // - The one in either highlighted attributes or cropped attributes if there are attributes - // to retrieve - // - All the attributes to retrieve if there are either highlighted or cropped attributes - // the request specified that all attributes are to retrieve (i.e attributes to retrieve is - // empty in the query) - // let all_formatted = if query.attributes_to_retrieve.is_none() { - // if query.attributes_to_highlight.is_some() || query.attributes_to_crop.is_some() { - // Cow::Borrowed(&all_attributes) - // } else { - // Cow::Owned(Vec::new()) - // } - // } else { - // let attrs = (&to_crop_ids | &to_highlight_ids) - // .intersection(&displayed_ids) - // .cloned() - // .collect::>(); - // Cow::Owned(attrs) - // }; - let stop_words = fst::Set::default(); let formatter = Formatter::new(&stop_words, (String::from(""), String::from(""))); @@ -295,10 +225,7 @@ impl Index { &formatter, &matching_words, &ids_in_formatted, - // all_formatted.as_ref().as_slice(), &formatted_options, - // &to_highlight_ids, //ICI - // &to_crop_ids_length, //ICI )?; let hit = SearchHit { document, @@ -364,8 +291,6 @@ fn compute_formatted>( matching_words: &impl Matcher, ids_in_formatted: &[FieldId], formatted_options: &HashMap, - // to_highlight_fields: &HashSet, //ICI - // to_crop_fields: &HashMap>, //ICI ) -> anyhow::Result { let mut document = Document::new(); @@ -380,8 +305,6 @@ fn compute_formatted>( matching_words, format.highlight, format.crop, - // to_crop_fields.get(field).copied().flatten(), //ICI - // to_highlight_fields.contains(field), //ICI ); } @@ -405,12 +328,6 @@ trait Matcher { fn matches(&self, w: &str) -> Option; } -// #[cfg(test)] -// impl Matcher for HashSet { -// fn matches(&self, w: &str) -> bool { -// self.contains(w) -// } -// } #[cfg(test)] impl Matcher for HashMap<&str, Option> { fn matches(&self, w: &str) -> Option { @@ -681,7 +598,6 @@ mod test { let mut formatted_options = HashMap::new(); formatted_options.insert(title, FormatOptions { highlight: true, crop: None }); - // let matching_words = HashSet::from_iter(Some(String::from("hobbit"))); let mut matching_words = HashMap::new(); matching_words.insert("hobbit", Some(6)); From d9c0190497ec2f6320991c9010e6bead07c274dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Tue, 15 Jun 2021 16:21:41 +0200 Subject: [PATCH 19/34] Redo to_retrieve_ids --- meilisearch-http/src/index/search.rs | 45 ++++++++++++++++------------ 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 37aea6248..5e4ee6e6f 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -113,31 +113,37 @@ impl Index { } if let Some(id) = fields_ids_map.id(attr) { - if displayed_ids.contains(&id) { - ids.insert(id); - } + ids.insert(id); } } ids }; - let to_retrieve_ids = query + // The attributes to retrieve are the ones explicitly marked as to retrieve (all by default), + // but these attributes must be also + // - present in the fields_ids_map + // - present in the the displayed attributes + let to_retrieve_ids: HashSet<_> = query .attributes_to_retrieve .as_ref() .map(fids) - .unwrap_or_else(|| displayed_ids.clone()); - - // The attributes to retrieve are: - // - the ones explicitly marked as to retrieve that are also in the displayed attributes - let all_attributes: Vec<_> = to_retrieve_ids + .unwrap_or_else(|| displayed_ids.clone()) .intersection(&displayed_ids) .cloned() + .collect(); + + let to_retrieve_ids_sorted: Vec<_> = to_retrieve_ids + .clone() + .into_iter() .sorted() .collect(); let mut formatted_options = HashMap::new(); - let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default(); + let attr_to_highlight = query + .attributes_to_highlight + .unwrap_or_default(); + for attr in attr_to_highlight { let new_format = FormatOptions { highlight: true, @@ -159,7 +165,10 @@ impl Index { } }; - let attr_to_crop = query.attributes_to_crop.unwrap_or_default(); + let attr_to_crop = query + .attributes_to_crop + .unwrap_or_default(); + for attr in attr_to_crop { let mut attr_name = attr.clone(); let mut attr_len = Some(query.crop_length); @@ -201,13 +210,13 @@ impl Index { } } - let formatted_ids = formatted_options + // All attributes present in `_formatted`: + // - attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`) + // - attributes asked to be retrieved: these attributes will not be formatted + let ids_in_formatted = formatted_options .keys() .cloned() - .collect::>(); - - // All attributes present in `_formatted` that are not necessary highighted or cropped - let ids_in_formatted = formatted_ids + .collect::>() .union(&to_retrieve_ids) .cloned() .sorted() @@ -218,7 +227,7 @@ impl Index { Formatter::new(&stop_words, (String::from(""), String::from(""))); for (_id, obkv) in self.documents(&rtxn, documents_ids)? { - let document = make_document(&all_attributes, &fields_ids_map, obkv)?; + let document = make_document(&to_retrieve_ids_sorted, &fields_ids_map, obkv)?; let formatted = compute_formatted( &fields_ids_map, obkv, @@ -498,8 +507,6 @@ fn parse_facets_array( #[cfg(test)] mod test { - use std::iter::FromIterator; - use super::*; #[test] From 5e656bb58a452d42a3148d530c7ca8dee580344b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Tue, 15 Jun 2021 16:25:16 +0200 Subject: [PATCH 20/34] Rename parse_facets into parse_filter --- meilisearch-http/src/index/search.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 5e4ee6e6f..4b178815f 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -85,7 +85,7 @@ impl Index { search.offset(query.offset.unwrap_or_default()); if let Some(ref filter) = query.filter { - if let Some(facets) = parse_facets(filter, self, &rtxn)? { + if let Some(facets) = parse_filter(filter, self, &rtxn)? { search.filter(facets); } } @@ -464,19 +464,19 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { } } -fn parse_facets( +fn parse_filter( facets: &Value, index: &Index, txn: &RoTxn, ) -> anyhow::Result> { match facets { Value::String(expr) => Ok(Some(FilterCondition::from_str(txn, index, expr)?)), - Value::Array(arr) => parse_facets_array(txn, index, arr), + Value::Array(arr) => parse_filter_array(txn, index, arr), v => bail!("Invalid facet expression, expected Array, found: {:?}", v), } } -fn parse_facets_array( +fn parse_filter_array( txn: &RoTxn, index: &Index, arr: &[Value], From 8d0269fcc4386a0022ea50954d4d1172fb4c562b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Tue, 15 Jun 2021 17:16:07 +0200 Subject: [PATCH 21/34] Create function to create fomatted_options --- meilisearch-http/src/index/search.rs | 146 +++++++++++++++------------ 1 file changed, 83 insertions(+), 63 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 4b178815f..e4c7777a0 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -138,81 +138,26 @@ impl Index { .sorted() .collect(); - let mut formatted_options = HashMap::new(); - let attr_to_highlight = query .attributes_to_highlight .unwrap_or_default(); - for attr in attr_to_highlight { - let new_format = FormatOptions { - highlight: true, - crop: None, - }; - - if attr == "*" { - let ids = displayed_ids.clone(); - for id in ids { - formatted_options.insert(id, new_format); - } - break; - } - - if let Some(id) = fields_ids_map.id(&attr) { - if displayed_ids.contains(&id) { - formatted_options.insert(id, new_format); - } - } - }; - let attr_to_crop = query .attributes_to_crop .unwrap_or_default(); - for attr in attr_to_crop { - let mut attr_name = attr.clone(); - let mut attr_len = Some(query.crop_length); - - if attr_name.contains(':') { - let mut split = attr_name.rsplit(':'); - attr_len = match split.next() { - Some(s) => s.parse::().ok(), - None => None, - }; - attr_name = split.flat_map(|s| s.chars()).collect(); - } - - if attr_name == "*" { - let ids = displayed_ids.clone(); - for id in ids { - let mut highlight = false; - if let Some(f) = formatted_options.get(&id) { - highlight = f.highlight; - } - formatted_options.insert(id, FormatOptions { - highlight, - crop: attr_len, - }); - } - } - - if let Some(id) = fields_ids_map.id(&attr_name) { - if displayed_ids.contains(&id) { - let mut highlight = false; - if let Some(f) = formatted_options.get(&id) { - highlight = f.highlight; - } - formatted_options.insert(id, FormatOptions { - highlight, - crop: attr_len, - }); - } - } - } + let formatted_options = parse_formatted_options( + &attr_to_highlight, + &attr_to_crop, + query.crop_length, + &fields_ids_map, + &displayed_ids, + ); // All attributes present in `_formatted`: // - attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`) // - attributes asked to be retrieved: these attributes will not be formatted + // - attributes that are present in displayed attributes let ids_in_formatted = formatted_options .keys() .cloned() @@ -270,6 +215,81 @@ impl Index { } } +fn parse_formatted_options( + attr_to_highlight: &HashSet, + attr_to_crop: &[String], + query_crop_length: usize, + fields_ids_map: &FieldsIdsMap, + displayed_ids: &HashSet, + ) -> HashMap { + + let mut formatted_options = HashMap::new(); + + for attr in attr_to_highlight { + let new_format = FormatOptions { + highlight: true, + crop: None, + }; + + if attr == "*" { + let ids = displayed_ids.clone(); + for id in ids { + formatted_options.insert(id, new_format); + } + break; + } + + if let Some(id) = fields_ids_map.id(&attr) { + if displayed_ids.contains(&id) { + formatted_options.insert(id, new_format); + } + } + }; + + for attr in attr_to_crop { + let mut attr_name = attr.clone(); + let mut attr_len = Some(query_crop_length); + + if attr_name.contains(':') { + let mut split = attr_name.rsplit(':'); + attr_len = match split.next() { + Some(s) => s.parse::().ok(), + None => None, + }; + attr_name = split.flat_map(|s| s.chars()).collect(); + } + + if attr_name == "*" { + let ids = displayed_ids.clone(); + for id in ids { + let mut highlight = false; + if let Some(f) = formatted_options.get(&id) { + highlight = f.highlight; + } + formatted_options.insert(id, FormatOptions { + highlight, + crop: attr_len, + }); + } + } + + if let Some(id) = fields_ids_map.id(&attr_name) { + if displayed_ids.contains(&id) { + let mut highlight = false; + if let Some(f) = formatted_options.get(&id) { + highlight = f.highlight; + } + formatted_options.insert(id, FormatOptions { + highlight, + crop: attr_len, + }); + } + } + } + + formatted_options +} + fn make_document( attributes_to_retrieve: &[FieldId], field_ids_map: &FieldsIdsMap, From 79a1212ebea46403389cfc4b0e632a919f5161a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Tue, 15 Jun 2021 17:28:26 +0200 Subject: [PATCH 22/34] Do intersection with displayed ids instead of checking in loop --- meilisearch-http/src/index/search.rs | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index e4c7777a0..c1275d089 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -162,6 +162,9 @@ impl Index { .keys() .cloned() .collect::>() + .intersection(&displayed_ids) + .cloned() + .collect::>() .union(&to_retrieve_ids) .cloned() .sorted() @@ -240,9 +243,7 @@ fn parse_formatted_options( } if let Some(id) = fields_ids_map.id(&attr) { - if displayed_ids.contains(&id) { - formatted_options.insert(id, new_format); - } + formatted_options.insert(id, new_format); } }; @@ -274,16 +275,14 @@ fn parse_formatted_options( } if let Some(id) = fields_ids_map.id(&attr_name) { - if displayed_ids.contains(&id) { - let mut highlight = false; - if let Some(f) = formatted_options.get(&id) { - highlight = f.highlight; - } - formatted_options.insert(id, FormatOptions { - highlight, - crop: attr_len, - }); + let mut highlight = false; + if let Some(f) = formatted_options.get(&id) { + highlight = f.highlight; } + formatted_options.insert(id, FormatOptions { + highlight, + crop: attr_len, + }); } } From 1ef061d92bfe53c1a69ad2ba589430b0a937434c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Tue, 15 Jun 2021 17:31:32 +0200 Subject: [PATCH 23/34] Fix clippy errors --- meilisearch-http/src/index/search.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index c1275d089..cfb2f77d1 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -154,10 +154,10 @@ impl Index { &displayed_ids, ); - // All attributes present in `_formatted`: - // - attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`) - // - attributes asked to be retrieved: these attributes will not be formatted - // - attributes that are present in displayed attributes + // All attributes present in `_formatted` are: + // - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`) + // - the attributes asked to be retrieved: these attributes will not be highlighted/cropped + // But these attributes must be present in displayed attributes let ids_in_formatted = formatted_options .keys() .cloned() @@ -427,7 +427,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { let mut buffer = VecDeque::new(); let mut tokens = analyzed.reconstruct().peekable(); let mut taken_before = 0; - while let Some((word, token)) = tokens.next_if(|(_, token)| !matcher.matches(token.text()).is_some()) { + while let Some((word, token)) = tokens.next_if(|(_, token)| matcher.matches(token.text()).is_none()) { buffer.push_back((word, token)); taken_before += word.chars().count(); while taken_before > crop_len { From 9840b5c7fb59a5afa773bc696aa711fb50818cee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Tue, 15 Jun 2021 18:44:56 +0200 Subject: [PATCH 24/34] Refacto --- meilisearch-http/src/index/search.rs | 58 +++++++++++++--------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index cfb2f77d1..c2e57af75 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -235,9 +235,8 @@ fn parse_formatted_options( }; if attr == "*" { - let ids = displayed_ids.clone(); - for id in ids { - formatted_options.insert(id, new_format); + for id in displayed_ids { + formatted_options.insert(*id, new_format); } break; } @@ -245,44 +244,41 @@ fn parse_formatted_options( if let Some(id) = fields_ids_map.id(&attr) { formatted_options.insert(id, new_format); } - }; + } for attr in attr_to_crop { let mut attr_name = attr.clone(); - let mut attr_len = Some(query_crop_length); + let mut attr_len = query_crop_length; - if attr_name.contains(':') { - let mut split = attr_name.rsplit(':'); - attr_len = match split.next() { - Some(s) => s.parse::().ok(), - None => None, - }; - attr_name = split.flat_map(|s| s.chars()).collect(); - } + let mut split = attr_name.rsplitn(2, ':'); + attr_name = match split.next().zip(split.next()) { + Some((len, name)) => { + attr_len = len.parse().unwrap_or(query_crop_length); + name.to_string() + }, + None => attr_name, + }; if attr_name == "*" { - let ids = displayed_ids.clone(); - for id in ids { - let mut highlight = false; - if let Some(f) = formatted_options.get(&id) { - highlight = f.highlight; - } - formatted_options.insert(id, FormatOptions { - highlight, - crop: attr_len, - }); + for id in displayed_ids { + formatted_options + .entry(*id) + .and_modify(|f| f.crop = Some(attr_len)) + .or_insert(FormatOptions { + highlight: false, + crop: Some(attr_len), + }); } } if let Some(id) = fields_ids_map.id(&attr_name) { - let mut highlight = false; - if let Some(f) = formatted_options.get(&id) { - highlight = f.highlight; - } - formatted_options.insert(id, FormatOptions { - highlight, - crop: attr_len, - }); + formatted_options + .entry(id) + .and_modify(|f| f.crop = Some(attr_len)) + .or_insert(FormatOptions { + highlight: false, + crop: Some(attr_len), + }); } } From 7b02fdaddc64870f935ff1a033152dad931d29ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Wed, 16 Jun 2021 14:23:08 +0200 Subject: [PATCH 25/34] Rename functions --- meilisearch-http/src/index/search.rs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index c2e57af75..b5ba86a16 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -146,7 +146,7 @@ impl Index { .attributes_to_crop .unwrap_or_default(); - let formatted_options = parse_formatted_options( + let formatted_options = compute_formatted_options( &attr_to_highlight, &attr_to_crop, query.crop_length, @@ -176,7 +176,7 @@ impl Index { for (_id, obkv) in self.documents(&rtxn, documents_ids)? { let document = make_document(&to_retrieve_ids_sorted, &fields_ids_map, obkv)?; - let formatted = compute_formatted( + let formatted = format_fields( &fields_ids_map, obkv, &formatter, @@ -218,7 +218,7 @@ impl Index { } } -fn parse_formatted_options( +fn compute_formatted_options( attr_to_highlight: &HashSet, attr_to_crop: &[String], query_crop_length: usize, @@ -308,7 +308,7 @@ fn make_document( Ok(document) } -fn compute_formatted>( +fn format_fields>( field_ids_map: &FieldsIdsMap, obkv: obkv::KvReader, formatter: &Formatter, @@ -347,7 +347,7 @@ fn compute_formatted>( Ok(document) } -/// trait to allow unit testing of `compute_formatted` +/// trait to allow unit testing of `format_fields` trait Matcher { fn matches(&self, w: &str) -> Option; } @@ -546,7 +546,7 @@ mod test { let matching_words = MatchingWords::default(); - let value = compute_formatted( + let value = format_fields( &fields, obkv, &formatter, @@ -581,7 +581,7 @@ mod test { let matching_words = MatchingWords::default(); - let value = compute_formatted( + let value = format_fields( &fields, obkv, &formatter, @@ -623,7 +623,7 @@ mod test { let mut matching_words = HashMap::new(); matching_words.insert("hobbit", Some(6)); - let value = compute_formatted( + let value = format_fields( &fields, obkv, &formatter, @@ -666,7 +666,7 @@ mod test { let mut matching_words = HashMap::new(); matching_words.insert("hobbit", Some(3)); - let value = compute_formatted( + let value = format_fields( &fields, obkv, &formatter, @@ -709,7 +709,7 @@ mod test { let mut matching_words = HashMap::new(); matching_words.insert("potter", Some(6)); - let value = compute_formatted( + let value = format_fields( &fields, obkv, &formatter, @@ -752,7 +752,7 @@ mod test { let mut matching_words = HashMap::new(); matching_words.insert("potter", Some(6)); - let value = compute_formatted( + let value = format_fields( &fields, obkv, &formatter, @@ -795,7 +795,7 @@ mod test { let mut matching_words = HashMap::new(); matching_words.insert("potter", Some(6)); - let value = compute_formatted( + let value = format_fields( &fields, obkv, &formatter, @@ -838,7 +838,7 @@ mod test { let mut matching_words = HashMap::new(); matching_words.insert("and", Some(3)); - let value = compute_formatted( + let value = format_fields( &fields, obkv, &formatter, @@ -881,7 +881,7 @@ mod test { let mut matching_words = HashMap::new(); matching_words.insert("blood", Some(3)); - let value = compute_formatted( + let value = format_fields( &fields, obkv, &formatter, From dc5a3d4a620a1fdf5b903a3b42ffe1be292575b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Wed, 16 Jun 2021 16:18:55 +0200 Subject: [PATCH 26/34] Use BTreeSet instead of HashSet --- meilisearch-http/src/index/search.rs | 49 +++++++++++---------------- meilisearch-http/src/routes/search.rs | 4 +-- 2 files changed, 21 insertions(+), 32 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index b5ba86a16..bc9d5ac47 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -1,11 +1,10 @@ -use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque}; use std::time::Instant; use anyhow::bail; use either::Either; use heed::RoTxn; use indexmap::IndexMap; -use itertools::Itertools; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token}; use milli::{FilterCondition, FieldId, FieldsIdsMap, MatchingWords}; use serde::{Deserialize, Serialize}; @@ -32,7 +31,7 @@ pub struct SearchQuery { pub offset: Option, #[serde(default = "default_search_limit")] pub limit: usize, - pub attributes_to_retrieve: Option>, + pub attributes_to_retrieve: Option>, pub attributes_to_crop: Option>, #[serde(default = "default_crop_length")] pub crop_length: usize, @@ -101,11 +100,11 @@ impl Index { let displayed_ids = self .displayed_fields_ids(&rtxn)? - .map(|fields| fields.into_iter().collect::>()) + .map(|fields| fields.into_iter().collect::>()) .unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); - let fids = |attrs: &HashSet| { - let mut ids = HashSet::new(); + let fids = |attrs: &BTreeSet| { + let mut ids = BTreeSet::new(); for attr in attrs { if attr == "*" { ids = displayed_ids.clone(); @@ -123,7 +122,7 @@ impl Index { // but these attributes must be also // - present in the fields_ids_map // - present in the the displayed attributes - let to_retrieve_ids: HashSet<_> = query + let to_retrieve_ids: BTreeSet<_> = query .attributes_to_retrieve .as_ref() .map(fids) @@ -132,12 +131,6 @@ impl Index { .cloned() .collect(); - let to_retrieve_ids_sorted: Vec<_> = to_retrieve_ids - .clone() - .into_iter() - .sorted() - .collect(); - let attr_to_highlight = query .attributes_to_highlight .unwrap_or_default(); @@ -161,13 +154,12 @@ impl Index { let ids_in_formatted = formatted_options .keys() .cloned() - .collect::>() + .collect::>() .intersection(&displayed_ids) .cloned() - .collect::>() + .collect::>() .union(&to_retrieve_ids) .cloned() - .sorted() .collect::>(); let stop_words = fst::Set::default(); @@ -175,7 +167,7 @@ impl Index { Formatter::new(&stop_words, (String::from(""), String::from(""))); for (_id, obkv) in self.documents(&rtxn, documents_ids)? { - let document = make_document(&to_retrieve_ids_sorted, &fields_ids_map, obkv)?; + let document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?; let formatted = format_fields( &fields_ids_map, obkv, @@ -223,7 +215,7 @@ fn compute_formatted_options( attr_to_crop: &[String], query_crop_length: usize, fields_ids_map: &FieldsIdsMap, - displayed_ids: &HashSet, + displayed_ids: &BTreeSet, ) -> HashMap { let mut formatted_options = HashMap::new(); @@ -286,7 +278,7 @@ fn compute_formatted_options( } fn make_document( - attributes_to_retrieve: &[FieldId], + attributes_to_retrieve: &BTreeSet, field_ids_map: &FieldsIdsMap, obkv: obkv::KvReader, ) -> anyhow::Result { @@ -327,8 +319,7 @@ fn format_fields>( value = formatter.format_value( value, matching_words, - format.highlight, - format.crop, + *format, ); } @@ -384,25 +375,24 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { &self, value: Value, matcher: &impl Matcher, - need_to_highlight: bool, - need_to_crop: Option, + format_options: FormatOptions, ) -> Value { match value { Value::String(old_string) => { let value = - self.format_string(old_string, matcher, need_to_highlight, need_to_crop); + self.format_string(old_string, matcher, format_options); Value::String(value) } Value::Array(values) => Value::Array( values .into_iter() - .map(|v| self.format_value(v, matcher, need_to_highlight, None)) + .map(|v| self.format_value(v, matcher, FormatOptions { highlight: format_options.highlight, crop: None })) .collect(), ), Value::Object(object) => Value::Object( object .into_iter() - .map(|(k, v)| (k, self.format_value(v, matcher, need_to_highlight, None))) + .map(|(k, v)| (k, self.format_value(v, matcher, FormatOptions { highlight: format_options.highlight, crop: None }))) .collect(), ), value => value, @@ -413,12 +403,11 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { &self, s: String, matcher: &impl Matcher, - need_to_highlight: bool, - need_to_crop: Option, + format_options: FormatOptions, ) -> String { let analyzed = self.analyzer.analyze(&s); - let tokens: Box> = match need_to_crop { + let tokens: Box> = match format_options.crop { Some(crop_len) => { let mut buffer = VecDeque::new(); let mut tokens = analyzed.reconstruct().peekable(); @@ -462,7 +451,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { tokens .map(|(word, token)| { - if need_to_highlight && token.is_word() && matcher.matches(token.text()).is_some() { + if format_options.highlight && token.is_word() && matcher.matches(token.text()).is_some() { let mut new_word = String::new(); new_word.push_str(&self.marks.0); if let Some(match_len) = matcher.matches(token.text()) { diff --git a/meilisearch-http/src/routes/search.rs b/meilisearch-http/src/routes/search.rs index c2c83e3c8..36f5bdf4d 100644 --- a/meilisearch-http/src/routes/search.rs +++ b/meilisearch-http/src/routes/search.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::collections::{BTreeSet, HashSet}; use std::convert::{TryFrom, TryInto}; use actix_web::{get, post, web, HttpResponse}; @@ -36,7 +36,7 @@ impl TryFrom for SearchQuery { fn try_from(other: SearchQueryGet) -> anyhow::Result { let attributes_to_retrieve = other .attributes_to_retrieve - .map(|attrs| attrs.split(',').map(String::from).collect::>()); + .map(|attrs| attrs.split(',').map(String::from).collect::>()); let attributes_to_crop = other .attributes_to_crop From 9538790b33334f706145b2e35f0036fa0dca95cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Wed, 16 Jun 2021 17:13:21 +0200 Subject: [PATCH 27/34] Decompose into two functions --- meilisearch-http/src/index/search.rs | 37 ++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index bc9d5ac47..46e1e6e57 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -220,6 +220,30 @@ fn compute_formatted_options( let mut formatted_options = HashMap::new(); + formatted_options = add_highlight_to_formatted_options( + formatted_options, + attr_to_highlight, + fields_ids_map, + displayed_ids, + ); + + formatted_options = add_crop_to_formatted_options( + formatted_options, + attr_to_crop, + query_crop_length, + fields_ids_map, + displayed_ids, + ); + + formatted_options +} + +fn add_highlight_to_formatted_options( + mut formatted_options: HashMap, + attr_to_highlight: &HashSet, + fields_ids_map: &FieldsIdsMap, + displayed_ids: &BTreeSet, +) -> HashMap { for attr in attr_to_highlight { let new_format = FormatOptions { highlight: true, @@ -237,15 +261,24 @@ fn compute_formatted_options( formatted_options.insert(id, new_format); } } + formatted_options +} +fn add_crop_to_formatted_options( + mut formatted_options: HashMap, + attr_to_crop: &[String], + crop_length: usize, + fields_ids_map: &FieldsIdsMap, + displayed_ids: &BTreeSet, +) -> HashMap { for attr in attr_to_crop { let mut attr_name = attr.clone(); - let mut attr_len = query_crop_length; + let mut attr_len = crop_length; let mut split = attr_name.rsplitn(2, ':'); attr_name = match split.next().zip(split.next()) { Some((len, name)) => { - attr_len = len.parse().unwrap_or(query_crop_length); + attr_len = len.parse().unwrap_or(crop_length); name.to_string() }, None => attr_name, From a0b022afee3e5fd0398b06b39bb831334dd5ddfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Wed, 16 Jun 2021 17:25:02 +0200 Subject: [PATCH 28/34] Add Cow --- meilisearch-http/src/index/search.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 46e1e6e57..190d6b760 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque}; use std::time::Instant; @@ -492,9 +493,9 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { new_word.push_str(&self.marks.1); new_word.push_str(&word[match_len..]); } - new_word + Cow::Owned(new_word) } else { - word.to_string() + Cow::Borrowed(word) } }) .collect::() From 97909ce56e8971027c5850a722583ca7e003779e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Wed, 16 Jun 2021 19:30:06 +0200 Subject: [PATCH 29/34] Use BTreeMap and remove ids_in_formatted --- meilisearch-http/src/index/search.rs | 251 ++++++++++----------------- 1 file changed, 90 insertions(+), 161 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 190d6b760..40ac1b1d9 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -1,5 +1,5 @@ use std::borrow::Cow; -use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque}; +use std::collections::{BTreeMap, BTreeSet, HashSet, VecDeque}; use std::time::Instant; use anyhow::bail; @@ -120,9 +120,9 @@ impl Index { }; // The attributes to retrieve are the ones explicitly marked as to retrieve (all by default), - // but these attributes must be also - // - present in the fields_ids_map - // - present in the the displayed attributes + // but these attributes must be also be present + // - in the fields_ids_map + // - in the the displayed attributes let to_retrieve_ids: BTreeSet<_> = query .attributes_to_retrieve .as_ref() @@ -140,29 +140,20 @@ impl Index { .attributes_to_crop .unwrap_or_default(); + // Attributes in `formatted_options` correspond to the attributes that will be in `_formatted` + // These attributes are: + // - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`) + // - the attributes asked to be retrieved: these attributes will not be highlighted/cropped + // But these attributes must be present in displayed attributes let formatted_options = compute_formatted_options( &attr_to_highlight, &attr_to_crop, query.crop_length, + &to_retrieve_ids, &fields_ids_map, &displayed_ids, ); - // All attributes present in `_formatted` are: - // - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`) - // - the attributes asked to be retrieved: these attributes will not be highlighted/cropped - // But these attributes must be present in displayed attributes - let ids_in_formatted = formatted_options - .keys() - .cloned() - .collect::>() - .intersection(&displayed_ids) - .cloned() - .collect::>() - .union(&to_retrieve_ids) - .cloned() - .collect::>(); - let stop_words = fst::Set::default(); let formatter = Formatter::new(&stop_words, (String::from(""), String::from(""))); @@ -174,7 +165,6 @@ impl Index { obkv, &formatter, &matching_words, - &ids_in_formatted, &formatted_options, )?; let hit = SearchHit { @@ -215,11 +205,12 @@ fn compute_formatted_options( attr_to_highlight: &HashSet, attr_to_crop: &[String], query_crop_length: usize, + to_retrieve_ids: &BTreeSet, fields_ids_map: &FieldsIdsMap, displayed_ids: &BTreeSet, - ) -> HashMap { + ) -> BTreeMap { - let mut formatted_options = HashMap::new(); + let mut formatted_options = BTreeMap::new(); formatted_options = add_highlight_to_formatted_options( formatted_options, @@ -236,15 +227,23 @@ fn compute_formatted_options( displayed_ids, ); + // Should not return `_formatted` if no valid attributes to highlight/crop + if !formatted_options.is_empty() { + formatted_options = add_non_formatted_ids_to_formatted_options( + formatted_options, + to_retrieve_ids, + ); + } + formatted_options } fn add_highlight_to_formatted_options( - mut formatted_options: HashMap, + mut formatted_options: BTreeMap, attr_to_highlight: &HashSet, fields_ids_map: &FieldsIdsMap, displayed_ids: &BTreeSet, -) -> HashMap { +) -> BTreeMap { for attr in attr_to_highlight { let new_format = FormatOptions { highlight: true, @@ -259,19 +258,22 @@ fn add_highlight_to_formatted_options( } if let Some(id) = fields_ids_map.id(&attr) { - formatted_options.insert(id, new_format); + if displayed_ids.contains(&id) { + formatted_options.insert(id, new_format); + } } } + formatted_options } fn add_crop_to_formatted_options( - mut formatted_options: HashMap, + mut formatted_options: BTreeMap, attr_to_crop: &[String], crop_length: usize, fields_ids_map: &FieldsIdsMap, displayed_ids: &BTreeSet, -) -> HashMap { +) -> BTreeMap { for attr in attr_to_crop { let mut attr_name = attr.clone(); let mut attr_len = crop_length; @@ -298,19 +300,37 @@ fn add_crop_to_formatted_options( } if let Some(id) = fields_ids_map.id(&attr_name) { - formatted_options - .entry(id) - .and_modify(|f| f.crop = Some(attr_len)) - .or_insert(FormatOptions { - highlight: false, - crop: Some(attr_len), - }); + if displayed_ids.contains(&id) { + formatted_options + .entry(id) + .and_modify(|f| f.crop = Some(attr_len)) + .or_insert(FormatOptions { + highlight: false, + crop: Some(attr_len), + }); + } } } formatted_options } +fn add_non_formatted_ids_to_formatted_options( + mut formatted_options: BTreeMap, + to_retrieve_ids: &BTreeSet +) -> BTreeMap { + for id in to_retrieve_ids { + formatted_options + .entry(*id) + .or_insert(FormatOptions { + highlight: false, + crop: None, + }); + } + + formatted_options +} + fn make_document( attributes_to_retrieve: &BTreeSet, field_ids_map: &FieldsIdsMap, @@ -339,33 +359,28 @@ fn format_fields>( obkv: obkv::KvReader, formatter: &Formatter, matching_words: &impl Matcher, - ids_in_formatted: &[FieldId], - formatted_options: &HashMap, + formatted_options: &BTreeMap, ) -> anyhow::Result { let mut document = Document::new(); - if !formatted_options.is_empty() { - for field in ids_in_formatted { - if let Some(value) = obkv.get(*field) { - let mut value: Value = serde_json::from_slice(value)?; + for (id, format) in formatted_options { + if let Some(value) = obkv.get(*id) { + let mut value: Value = serde_json::from_slice(value)?; - if let Some(format) = formatted_options.get(field) { - value = formatter.format_value( - value, - matching_words, - *format, - ); - } + value = formatter.format_value( + value, + matching_words, + *format, + ); - // This unwrap must be safe since we got the ids from the fields_ids_map just - // before. - let key = field_ids_map - .name(*field) - .expect("Missing field name") - .to_string(); + // This unwrap must be safe since we got the ids from the fields_ids_map just + // before. + let key = field_ids_map + .name(*id) + .expect("Missing field name") + .to_string(); - document.insert(key, value); - } + document.insert(key, value); } } @@ -378,7 +393,7 @@ trait Matcher { } #[cfg(test)] -impl Matcher for HashMap<&str, Option> { +impl Matcher for BTreeMap<&str, Option> { fn matches(&self, w: &str) -> Option { self.get(w).cloned().flatten() } @@ -564,8 +579,7 @@ mod test { let obkv = obkv::KvReader::new(&buf); - let ids_in_formatted = Vec::new(); - let formatted_options = HashMap::new(); + let formatted_options = BTreeMap::new(); let matching_words = MatchingWords::default(); @@ -574,7 +588,6 @@ mod test { obkv, &formatter, &matching_words, - &ids_in_formatted, &formatted_options, ) .unwrap(); @@ -582,84 +595,6 @@ mod test { assert!(value.is_empty()); } - #[test] - fn no_formatted_with_ids() { - let stop_words = fst::Set::default(); - let formatter = - Formatter::new(&stop_words, (String::from(""), String::from(""))); - - let mut fields = FieldsIdsMap::new(); - let id = fields.insert("test").unwrap(); - - let mut buf = Vec::new(); - let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert(id, Value::String("hello".into()).to_string().as_bytes()) - .unwrap(); - obkv.finish().unwrap(); - - let obkv = obkv::KvReader::new(&buf); - - let ids_in_formatted = vec![id]; - let formatted_options = HashMap::new(); - - let matching_words = MatchingWords::default(); - - let value = format_fields( - &fields, - obkv, - &formatter, - &matching_words, - &ids_in_formatted, - &formatted_options, - ) - .unwrap(); - - assert!(value.is_empty()); - } - - #[test] - fn formatted_with_highlight() { - let stop_words = fst::Set::default(); - let formatter = - Formatter::new(&stop_words, (String::from(""), String::from(""))); - - let mut fields = FieldsIdsMap::new(); - let title = fields.insert("title").unwrap(); - let author = fields.insert("author").unwrap(); - - let mut buf = Vec::new(); - let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert(title, Value::String("The Hobbit".into()).to_string().as_bytes()) - .unwrap(); - obkv.finish().unwrap(); - obkv = obkv::KvWriter::new(&mut buf); - obkv.insert(author, Value::String("J. R. R. Tolkien".into()).to_string().as_bytes()) - .unwrap(); - obkv.finish().unwrap(); - - let obkv = obkv::KvReader::new(&buf); - - let ids_in_formatted = vec![title, author]; - let mut formatted_options = HashMap::new(); - formatted_options.insert(title, FormatOptions { highlight: true, crop: None }); - - let mut matching_words = HashMap::new(); - matching_words.insert("hobbit", Some(6)); - - let value = format_fields( - &fields, - obkv, - &formatter, - &matching_words, - &ids_in_formatted, - &formatted_options, - ) - .unwrap(); - - assert_eq!(value["title"], "The Hobbit"); - assert_eq!(value["author"], "J. R. R. Tolkien"); - } - #[test] fn formatted_with_highlight_in_word() { let stop_words = fst::Set::default(); @@ -682,11 +617,11 @@ mod test { let obkv = obkv::KvReader::new(&buf); - let ids_in_formatted = vec![title, author]; - let mut formatted_options = HashMap::new(); + let mut formatted_options = BTreeMap::new(); formatted_options.insert(title, FormatOptions { highlight: true, crop: None }); + formatted_options.insert(author, FormatOptions { highlight: false, crop: None }); - let mut matching_words = HashMap::new(); + let mut matching_words = BTreeMap::new(); matching_words.insert("hobbit", Some(3)); let value = format_fields( @@ -694,7 +629,6 @@ mod test { obkv, &formatter, &matching_words, - &ids_in_formatted, &formatted_options, ) .unwrap(); @@ -725,11 +659,11 @@ mod test { let obkv = obkv::KvReader::new(&buf); - let ids_in_formatted = vec![title, author]; - let mut formatted_options = HashMap::new(); + let mut formatted_options = BTreeMap::new(); formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(2) }); + formatted_options.insert(author, FormatOptions { highlight: false, crop: None }); - let mut matching_words = HashMap::new(); + let mut matching_words = BTreeMap::new(); matching_words.insert("potter", Some(6)); let value = format_fields( @@ -737,7 +671,6 @@ mod test { obkv, &formatter, &matching_words, - &ids_in_formatted, &formatted_options, ) .unwrap(); @@ -768,11 +701,11 @@ mod test { let obkv = obkv::KvReader::new(&buf); - let ids_in_formatted = vec![title, author]; - let mut formatted_options = HashMap::new(); + let mut formatted_options = BTreeMap::new(); formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(10) }); + formatted_options.insert(author, FormatOptions { highlight: false, crop: None }); - let mut matching_words = HashMap::new(); + let mut matching_words = BTreeMap::new(); matching_words.insert("potter", Some(6)); let value = format_fields( @@ -780,7 +713,6 @@ mod test { obkv, &formatter, &matching_words, - &ids_in_formatted, &formatted_options, ) .unwrap(); @@ -811,11 +743,11 @@ mod test { let obkv = obkv::KvReader::new(&buf); - let ids_in_formatted = vec![title, author]; - let mut formatted_options = HashMap::new(); + let mut formatted_options = BTreeMap::new(); formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(0) }); + formatted_options.insert(author, FormatOptions { highlight: false, crop: None }); - let mut matching_words = HashMap::new(); + let mut matching_words = BTreeMap::new(); matching_words.insert("potter", Some(6)); let value = format_fields( @@ -823,7 +755,6 @@ mod test { obkv, &formatter, &matching_words, - &ids_in_formatted, &formatted_options, ) .unwrap(); @@ -854,11 +785,11 @@ mod test { let obkv = obkv::KvReader::new(&buf); - let ids_in_formatted = vec![title, author]; - let mut formatted_options = HashMap::new(); + let mut formatted_options = BTreeMap::new(); formatted_options.insert(title, FormatOptions { highlight: true, crop: Some(1) }); + formatted_options.insert(author, FormatOptions { highlight: false, crop: None }); - let mut matching_words = HashMap::new(); + let mut matching_words = BTreeMap::new(); matching_words.insert("and", Some(3)); let value = format_fields( @@ -866,7 +797,6 @@ mod test { obkv, &formatter, &matching_words, - &ids_in_formatted, &formatted_options, ) .unwrap(); @@ -897,11 +827,11 @@ mod test { let obkv = obkv::KvReader::new(&buf); - let ids_in_formatted = vec![title, author]; - let mut formatted_options = HashMap::new(); + let mut formatted_options = BTreeMap::new(); formatted_options.insert(title, FormatOptions { highlight: true, crop: Some(9) }); + formatted_options.insert(author, FormatOptions { highlight: false, crop: None }); - let mut matching_words = HashMap::new(); + let mut matching_words = BTreeMap::new(); matching_words.insert("blood", Some(3)); let value = format_fields( @@ -909,7 +839,6 @@ mod test { obkv, &formatter, &matching_words, - &ids_in_formatted, &formatted_options, ) .unwrap(); From 9543ab4db6bf0ed36b1aa5666b54fdb30cd21472 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Thu, 17 Jun 2021 13:50:49 +0200 Subject: [PATCH 30/34] Use mut instead of returning the hashmap --- meilisearch-http/src/index/search.rs | 32 +++++++++++----------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 40ac1b1d9..e5eb66229 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -144,7 +144,7 @@ impl Index { // These attributes are: // - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`) // - the attributes asked to be retrieved: these attributes will not be highlighted/cropped - // But these attributes must be present in displayed attributes + // But these attributes must be also present in displayed attributes let formatted_options = compute_formatted_options( &attr_to_highlight, &attr_to_crop, @@ -212,15 +212,15 @@ fn compute_formatted_options( let mut formatted_options = BTreeMap::new(); - formatted_options = add_highlight_to_formatted_options( - formatted_options, + add_highlight_to_formatted_options( + &mut formatted_options, attr_to_highlight, fields_ids_map, displayed_ids, ); - formatted_options = add_crop_to_formatted_options( - formatted_options, + add_crop_to_formatted_options( + &mut formatted_options, attr_to_crop, query_crop_length, fields_ids_map, @@ -229,8 +229,8 @@ fn compute_formatted_options( // Should not return `_formatted` if no valid attributes to highlight/crop if !formatted_options.is_empty() { - formatted_options = add_non_formatted_ids_to_formatted_options( - formatted_options, + add_non_formatted_ids_to_formatted_options( + &mut formatted_options, to_retrieve_ids, ); } @@ -239,11 +239,11 @@ fn compute_formatted_options( } fn add_highlight_to_formatted_options( - mut formatted_options: BTreeMap, + formatted_options: &mut BTreeMap, attr_to_highlight: &HashSet, fields_ids_map: &FieldsIdsMap, displayed_ids: &BTreeSet, -) -> BTreeMap { +) { for attr in attr_to_highlight { let new_format = FormatOptions { highlight: true, @@ -263,17 +263,15 @@ fn add_highlight_to_formatted_options( } } } - - formatted_options } fn add_crop_to_formatted_options( - mut formatted_options: BTreeMap, + formatted_options: &mut BTreeMap, attr_to_crop: &[String], crop_length: usize, fields_ids_map: &FieldsIdsMap, displayed_ids: &BTreeSet, -) -> BTreeMap { +) { for attr in attr_to_crop { let mut attr_name = attr.clone(); let mut attr_len = crop_length; @@ -311,14 +309,12 @@ fn add_crop_to_formatted_options( } } } - - formatted_options } fn add_non_formatted_ids_to_formatted_options( - mut formatted_options: BTreeMap, + formatted_options: &mut BTreeMap, to_retrieve_ids: &BTreeSet -) -> BTreeMap { +) { for id in to_retrieve_ids { formatted_options .entry(*id) @@ -327,8 +323,6 @@ fn add_non_formatted_ids_to_formatted_options( crop: None, }); } - - formatted_options } fn make_document( From 33e55bd82e8ecbe2514aea5542a9579c98609f24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Thu, 17 Jun 2021 16:59:01 +0200 Subject: [PATCH 31/34] Refactor the crop --- meilisearch-http/src/index/search.rs | 128 ++++++++++++++++++--------- 1 file changed, 87 insertions(+), 41 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index e5eb66229..51dc1dd85 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -1,5 +1,5 @@ use std::borrow::Cow; -use std::collections::{BTreeMap, BTreeSet, HashSet, VecDeque}; +use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::time::Instant; use anyhow::bail; @@ -273,16 +273,13 @@ fn add_crop_to_formatted_options( displayed_ids: &BTreeSet, ) { for attr in attr_to_crop { - let mut attr_name = attr.clone(); - let mut attr_len = crop_length; - - let mut split = attr_name.rsplitn(2, ':'); - attr_name = match split.next().zip(split.next()) { + let mut split = attr.rsplitn(2, ':'); + let (attr_name, attr_len) = match split.next().zip(split.next()) { Some((len, name)) => { - attr_len = len.parse().unwrap_or(crop_length); - name.to_string() + let crop_len = len.parse::().unwrap_or(crop_length); + (name, crop_len) }, - None => attr_name, + None => (attr.as_str(), crop_length), }; if attr_name == "*" { @@ -452,42 +449,49 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { let tokens: Box> = match format_options.crop { Some(crop_len) => { - let mut buffer = VecDeque::new(); + let mut buffer = Vec::new(); let mut tokens = analyzed.reconstruct().peekable(); - let mut taken_before = 0; + while let Some((word, token)) = tokens.next_if(|(_, token)| matcher.matches(token.text()).is_none()) { - buffer.push_back((word, token)); - taken_before += word.chars().count(); - while taken_before > crop_len { - // Around to the previous word - if let Some((word, _)) = buffer.front() { - if taken_before - word.chars().count() < crop_len { - break; - } - } - if let Some((word, _)) = buffer.pop_front() { - taken_before -= word.chars().count(); - } + buffer.push((word, token)); + } + + match tokens.next() { + Some(token) => { + let mut total_len: usize = buffer.iter().map(|(word, _)| word.len()).sum(); + let before_iter = buffer.into_iter().skip_while(move |(word, _)| { + total_len -= word.len(); + let take = total_len >= crop_len; + take + }); + + let mut taken_after = 0; + let after_iter = tokens + .take_while(move |(word, _)| { + let take = taken_after < crop_len; + taken_after += word.chars().count(); + take + }); + + let iter = before_iter + .chain(Some(token)) + .chain(after_iter); + + Box::new(iter) + + }, + // If no word matches in the attribute + None => { + let mut count = 0; + let iter = buffer.into_iter().take_while(move |(word, _)| { + let take = count < crop_len; + count += word.len(); + take + }); + + Box::new(iter) } } - - if let Some(token) = tokens.next() { - buffer.push_back(token); - } - - let mut taken_after = 0; - let after_iter = tokens - .take_while(move |(word, _)| { - let take = taken_after < crop_len; - taken_after += word.chars().count(); - take - }); - - let iter = buffer - .into_iter() - .chain(after_iter); - - Box::new(iter) } None => Box::new(analyzed.reconstruct()), }; @@ -757,6 +761,48 @@ mod test { assert_eq!(value["author"], "J. K. Rowling"); } + #[test] + fn formatted_with_crop_and_no_match() { + let stop_words = fst::Set::default(); + let formatter = + Formatter::new(&stop_words, (String::from(""), String::from(""))); + + let mut fields = FieldsIdsMap::new(); + let title = fields.insert("title").unwrap(); + let author = fields.insert("author").unwrap(); + + let mut buf = Vec::new(); + let mut obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(title, Value::String("Harry Potter and the Half-Blood Prince".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + obkv = obkv::KvWriter::new(&mut buf); + obkv.insert(author, Value::String("J. K. Rowling".into()).to_string().as_bytes()) + .unwrap(); + obkv.finish().unwrap(); + + let obkv = obkv::KvReader::new(&buf); + + let mut formatted_options = BTreeMap::new(); + formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(6) }); + formatted_options.insert(author, FormatOptions { highlight: false, crop: Some(20) }); + + let mut matching_words = BTreeMap::new(); + matching_words.insert("rowling", Some(3)); + + let value = format_fields( + &fields, + obkv, + &formatter, + &matching_words, + &formatted_options, + ) + .unwrap(); + + assert_eq!(value["title"], "Harry "); + assert_eq!(value["author"], "J. K. Rowling"); + } + #[test] fn formatted_with_crop_and_highlight() { let stop_words = fst::Set::default(); From e4b3d35ed8e45b18f3ea881575ff0a9cac98d3c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Thu, 17 Jun 2021 17:03:43 +0200 Subject: [PATCH 32/34] Fix clippy errors --- meilisearch-http/src/index/search.rs | 3 +-- meilisearch-http/src/index_controller/index_actor/message.rs | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 51dc1dd85..00ada864f 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -461,8 +461,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { let mut total_len: usize = buffer.iter().map(|(word, _)| word.len()).sum(); let before_iter = buffer.into_iter().skip_while(move |(word, _)| { total_len -= word.len(); - let take = total_len >= crop_len; - take + total_len >= crop_len }); let mut taken_after = 0; diff --git a/meilisearch-http/src/index_controller/index_actor/message.rs b/meilisearch-http/src/index_controller/index_actor/message.rs index 377b2c333..e7304d56c 100644 --- a/meilisearch-http/src/index_controller/index_actor/message.rs +++ b/meilisearch-http/src/index_controller/index_actor/message.rs @@ -8,6 +8,7 @@ use crate::index_controller::{Failed, IndexStats, Processed, Processing}; use super::{IndexMeta, IndexResult, IndexSettings}; +#[allow(clippy::large_enum_variant)] pub enum IndexMsg { CreateIndex { uuid: Uuid, From c5c7e76805236a36d67754cb76b19cdfade2311e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Thu, 17 Jun 2021 18:00:02 +0200 Subject: [PATCH 33/34] Update meilisearch-http/src/index/search.rs Co-authored-by: marin --- meilisearch-http/src/index/search.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 00ada864f..3261e9a37 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -477,7 +477,6 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { .chain(after_iter); Box::new(iter) - }, // If no word matches in the attribute None => { From 623b71e81e5f079bfade7e7d84686075873ff414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Thu, 17 Jun 2021 18:02:25 +0200 Subject: [PATCH 34/34] Fix clippy errors --- meilisearch-http/src/routes/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index beddf7ee3..4c5ebbe21 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -17,7 +17,7 @@ pub mod settings; pub mod stats; #[derive(Debug, Clone, Serialize, Deserialize)] -#[allow(clippy::clippy::large_enum_variant)] +#[allow(clippy::large_enum_variant)] #[serde(tag = "name")] pub enum UpdateType { ClearAll,