From 0da8fa115e90302e3ccbd62dccd6a94b2bfc8272 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Tue, 8 Jun 2021 17:33:20 +0200 Subject: [PATCH] Add custom croplength for attributes to crop --- meilisearch-http/src/index/search.rs | 61 +++++++++++++++++++-------- meilisearch-http/src/routes/search.rs | 2 +- 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 429b5582a..603ebf1b3 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -16,16 +16,16 @@ use super::Index; pub type Document = IndexMap; -// pub const DEFAULT_CROP_LENGTH: usize = 5; -// const fn default_crop_length() -> Option { -// Some(DEFAULT_CROP_LENGTH) -// } - pub const DEFAULT_SEARCH_LIMIT: usize = 20; const fn default_search_limit() -> usize { DEFAULT_SEARCH_LIMIT } +pub const DEFAULT_CROP_LENGTH: usize = 200; +const fn default_crop_length() -> Option { + Some(DEFAULT_CROP_LENGTH) +} + #[derive(Deserialize)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct SearchQuery { @@ -34,8 +34,8 @@ pub struct SearchQuery { #[serde(default = "default_search_limit")] pub limit: usize, pub attributes_to_retrieve: Option>, - pub attributes_to_crop: Option>, - // #[serde(default = "default_crop_length")] + pub attributes_to_crop: Option>, + #[serde(default = "default_crop_length")] pub crop_length: Option, pub attributes_to_highlight: Option>, pub matches: Option, @@ -126,12 +126,45 @@ impl Index { .map(fids) .unwrap_or_default(); - let to_crop_ids = query + let to_crop_ids_length = query .attributes_to_crop .as_ref() - .map(fids) + .map(|attributes: &Vec| { + let mut ids_length_crop = HashMap::new(); + for attribute in attributes { + let mut attr_name = attribute.clone(); + let mut attr_len = query.crop_length; + + if attr_name.contains(":") { + let mut split = attr_name.rsplit(':'); + attr_len = match split.nth(0) { + Some(s) => s.parse::().ok(), + None => None, + }; + attr_name = split.flat_map(|s| s.chars()).collect(); + } + + if attr_name == "*" { + let ids = displayed_ids.clone(); + for id in ids { + ids_length_crop.insert(id, attr_len); + } + } + + if let Some(id) = fields_ids_map.id(&attr_name) { + ids_length_crop.insert(id, attr_len); + } + } + ids_length_crop + }) .unwrap_or_default(); + let to_crop_ids = to_crop_ids_length + .clone() + .into_iter() + .map(|(k, _)| k) + .collect::>(); + // The attributes to retrieve are: // - the ones explicitly marked as to retrieve that are also in the displayed attributes let all_attributes: Vec<_> = to_retrieve_ids @@ -164,12 +197,6 @@ impl Index { let highlighter = Formatter::new(&stop_words, (String::from(""), String::from(""))); - let to_crop = to_crop_ids - .into_iter() - .map(|id| (id, query.crop_length)) - // .map(|id| (id, Some(5))) - .collect::>(); - for (_id, obkv) in self.documents(&rtxn, documents_ids)? { let document = make_document(&all_attributes, &fields_ids_map, obkv)?; let formatted = compute_formatted( @@ -179,7 +206,7 @@ impl Index { &matching_words, all_formatted.as_ref().as_slice(), &to_highlight_ids, - &to_crop, + &to_crop_ids_length, )?; let hit = SearchHit { document, @@ -352,7 +379,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { while let Some((word, token)) = tokens.next_if(|(_, token)| !matcher.matches(token.text())) { buffer.push_back((word, token)); taken_before += word.chars().count(); - while taken_before >= crop_len { + while taken_before > crop_len { // Around to the previous word if let Some((word, _)) = buffer.front() { if taken_before - word.chars().count() < crop_len { diff --git a/meilisearch-http/src/routes/search.rs b/meilisearch-http/src/routes/search.rs index be06960cf..8489215e7 100644 --- a/meilisearch-http/src/routes/search.rs +++ b/meilisearch-http/src/routes/search.rs @@ -40,7 +40,7 @@ impl TryFrom for SearchQuery { let attributes_to_crop = other .attributes_to_crop - .map(|attrs| attrs.split(',').map(String::from).collect::>()); + .map(|attrs| attrs.split(',').map(String::from).collect::>()); let attributes_to_highlight = other .attributes_to_highlight