From dc5a3d4a620a1fdf5b903a3b42ffe1be292575b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Wed, 16 Jun 2021 16:18:55 +0200 Subject: [PATCH] Use BTreeSet instead of HashSet --- meilisearch-http/src/index/search.rs | 49 +++++++++++---------------- meilisearch-http/src/routes/search.rs | 4 +-- 2 files changed, 21 insertions(+), 32 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index b5ba86a16..bc9d5ac47 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -1,11 +1,10 @@ -use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque}; use std::time::Instant; use anyhow::bail; use either::Either; use heed::RoTxn; use indexmap::IndexMap; -use itertools::Itertools; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token}; use milli::{FilterCondition, FieldId, FieldsIdsMap, MatchingWords}; use serde::{Deserialize, Serialize}; @@ -32,7 +31,7 @@ pub struct SearchQuery { pub offset: Option, #[serde(default = "default_search_limit")] pub limit: usize, - pub attributes_to_retrieve: Option>, + pub attributes_to_retrieve: Option>, pub attributes_to_crop: Option>, #[serde(default = "default_crop_length")] pub crop_length: usize, @@ -101,11 +100,11 @@ impl Index { let displayed_ids = self .displayed_fields_ids(&rtxn)? - .map(|fields| fields.into_iter().collect::>()) + .map(|fields| fields.into_iter().collect::>()) .unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); - let fids = |attrs: &HashSet| { - let mut ids = HashSet::new(); + let fids = |attrs: &BTreeSet| { + let mut ids = BTreeSet::new(); for attr in attrs { if attr == "*" { ids = displayed_ids.clone(); @@ -123,7 +122,7 @@ impl Index { // but these attributes must be also // - present in the fields_ids_map // - present in the the displayed attributes - let to_retrieve_ids: HashSet<_> = query + let to_retrieve_ids: BTreeSet<_> = query .attributes_to_retrieve .as_ref() .map(fids) @@ -132,12 +131,6 @@ impl Index { .cloned() .collect(); - let to_retrieve_ids_sorted: Vec<_> = to_retrieve_ids - .clone() - .into_iter() - .sorted() - .collect(); - let attr_to_highlight = query .attributes_to_highlight .unwrap_or_default(); @@ -161,13 +154,12 @@ impl Index { let ids_in_formatted = formatted_options .keys() .cloned() - .collect::>() + .collect::>() .intersection(&displayed_ids) .cloned() - .collect::>() + .collect::>() .union(&to_retrieve_ids) .cloned() - .sorted() .collect::>(); let stop_words = fst::Set::default(); @@ -175,7 +167,7 @@ impl Index { Formatter::new(&stop_words, (String::from(""), String::from(""))); for (_id, obkv) in self.documents(&rtxn, documents_ids)? { - let document = make_document(&to_retrieve_ids_sorted, &fields_ids_map, obkv)?; + let document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?; let formatted = format_fields( &fields_ids_map, obkv, @@ -223,7 +215,7 @@ fn compute_formatted_options( attr_to_crop: &[String], query_crop_length: usize, fields_ids_map: &FieldsIdsMap, - displayed_ids: &HashSet, + displayed_ids: &BTreeSet, ) -> HashMap { let mut formatted_options = HashMap::new(); @@ -286,7 +278,7 @@ fn compute_formatted_options( } fn make_document( - attributes_to_retrieve: &[FieldId], + attributes_to_retrieve: &BTreeSet, field_ids_map: &FieldsIdsMap, obkv: obkv::KvReader, ) -> anyhow::Result { @@ -327,8 +319,7 @@ fn format_fields>( value = formatter.format_value( value, matching_words, - format.highlight, - format.crop, + *format, ); } @@ -384,25 +375,24 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { &self, value: Value, matcher: &impl Matcher, - need_to_highlight: bool, - need_to_crop: Option, + format_options: FormatOptions, ) -> Value { match value { Value::String(old_string) => { let value = - self.format_string(old_string, matcher, need_to_highlight, need_to_crop); + self.format_string(old_string, matcher, format_options); Value::String(value) } Value::Array(values) => Value::Array( values .into_iter() - .map(|v| self.format_value(v, matcher, need_to_highlight, None)) + .map(|v| self.format_value(v, matcher, FormatOptions { highlight: format_options.highlight, crop: None })) .collect(), ), Value::Object(object) => Value::Object( object .into_iter() - .map(|(k, v)| (k, self.format_value(v, matcher, need_to_highlight, None))) + .map(|(k, v)| (k, self.format_value(v, matcher, FormatOptions { highlight: format_options.highlight, crop: None }))) .collect(), ), value => value, @@ -413,12 +403,11 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { &self, s: String, matcher: &impl Matcher, - need_to_highlight: bool, - need_to_crop: Option, + format_options: FormatOptions, ) -> String { let analyzed = self.analyzer.analyze(&s); - let tokens: Box> = match need_to_crop { + let tokens: Box> = match format_options.crop { Some(crop_len) => { let mut buffer = VecDeque::new(); let mut tokens = analyzed.reconstruct().peekable(); @@ -462,7 +451,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { tokens .map(|(word, token)| { - if need_to_highlight && token.is_word() && matcher.matches(token.text()).is_some() { + if format_options.highlight && token.is_word() && matcher.matches(token.text()).is_some() { let mut new_word = String::new(); new_word.push_str(&self.marks.0); if let Some(match_len) = matcher.matches(token.text()) { diff --git a/meilisearch-http/src/routes/search.rs b/meilisearch-http/src/routes/search.rs index c2c83e3c8..36f5bdf4d 100644 --- a/meilisearch-http/src/routes/search.rs +++ b/meilisearch-http/src/routes/search.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::collections::{BTreeSet, HashSet}; use std::convert::{TryFrom, TryInto}; use actix_web::{get, post, web, HttpResponse}; @@ -36,7 +36,7 @@ impl TryFrom for SearchQuery { fn try_from(other: SearchQueryGet) -> anyhow::Result { let attributes_to_retrieve = other .attributes_to_retrieve - .map(|attrs| attrs.split(',').map(String::from).collect::>()); + .map(|attrs| attrs.split(',').map(String::from).collect::>()); let attributes_to_crop = other .attributes_to_crop