From 521c96354f5065b0538b0ccd5e76a2c250092aab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 15 Nov 2019 12:04:46 +0100 Subject: [PATCH] Improve the highlight formatted outputs --- meilidb-http/src/error.rs | 9 +++ meilidb-http/src/helpers/meilidb.rs | 98 ++++++++++++++--------------- meilidb-http/src/routes/search.rs | 37 ++++++++--- 3 files changed, 84 insertions(+), 60 deletions(-) diff --git a/meilidb-http/src/error.rs b/meilidb-http/src/error.rs index a0860fcd4..afefdec3a 100644 --- a/meilidb-http/src/error.rs +++ b/meilidb-http/src/error.rs @@ -17,6 +17,7 @@ pub enum ResponseError { DocumentNotFound(String), MissingHeader(String), BadParameter(String, String), + OpenIndex(String), CreateIndex(String), Maintenance, } @@ -54,6 +55,10 @@ impl ResponseError { ResponseError::BadParameter(name.to_string(), message.to_string()) } + pub fn open_index(message: impl Display) -> ResponseError { + ResponseError::OpenIndex(message.to_string()) + } + pub fn create_index(message: impl Display) -> ResponseError { ResponseError::CreateIndex(message.to_string()) } @@ -96,6 +101,10 @@ impl IntoResponse for ResponseError { format!("Impossible to create index; {}", err), StatusCode::BAD_REQUEST, ), + ResponseError::OpenIndex(err) => error( + format!("Impossible to open index; {}", err), + StatusCode::BAD_REQUEST, + ), ResponseError::Maintenance => error( String::from("Server is in maintenance, please try again later"), StatusCode::SERVICE_UNAVAILABLE, diff --git a/meilidb-http/src/helpers/meilidb.rs b/meilidb-http/src/helpers/meilidb.rs index 8ad202a7f..d00ba29be 100644 --- a/meilidb-http/src/helpers/meilidb.rs +++ b/meilidb-http/src/helpers/meilidb.rs @@ -235,43 +235,35 @@ impl<'a> SearchBuilder<'a> { } fields = Some(set); } - let mut document: IndexMap = self + + let document: IndexMap = self .index .document(reader, fields.as_ref(), doc.id) .map_err(|e| Error::RetrieveDocument(doc.id.0, e.to_string()))? .ok_or(Error::DocumentNotFound(doc.id.0))?; + let mut formatted = document.clone(); let mut matches = doc.highlights.clone(); // Crops fields if needed - if let Some(fields) = self.attributes_to_crop.clone() { - for (field, length) in fields { - let _ = crop_document(&mut document, &mut matches, &schema, &field, length); - } + if let Some(fields) = &self.attributes_to_crop { + crop_document(&mut formatted, &mut matches, &schema, fields); } // Transform to readable matches let matches = calculate_matches(matches, self.attributes_to_retrieve.clone(), &schema); if !self.matches { - if let Some(attributes_to_highlight) = self.attributes_to_highlight.clone() { - let highlights = calculate_highlights( - document.clone(), - matches.clone(), - attributes_to_highlight, - ); - for (key, value) in highlights { - if let Some(content) = document.get_mut(&key) { - *content = value; - } - } + if let Some(attributes_to_highlight) = &self.attributes_to_highlight { + formatted = calculate_highlights(&formatted, &matches, attributes_to_highlight); } } let matches_info = if self.matches { Some(matches) } else { None }; let hit = SearchHit { - hit: document, + document, + formatted, matches_info, }; @@ -388,7 +380,9 @@ pub type MatchesInfos = HashMap>; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SearchHit { #[serde(flatten)] - pub hit: IndexMap, + pub document: IndexMap, + #[serde(rename = "_formatted", skip_serializing_if = "IndexMap::is_empty")] + pub formatted: IndexMap, #[serde(rename = "_matchesInfo", skip_serializing_if = "Option::is_none")] pub matches_info: Option, } @@ -431,32 +425,31 @@ fn crop_document( document: &mut IndexMap, matches: &mut Vec, schema: &Schema, - field: &str, - length: usize, -) -> Result<(), Error> { + fields: &HashMap, +) { matches.sort_unstable_by_key(|m| (m.char_index, m.char_length)); - let attribute = schema - .attribute(field) - .ok_or(Error::AttributeNotFoundOnSchema(field.to_string()))?; - let selected_matches = matches - .iter() - .filter(|m| SchemaAttr::new(m.attribute) == attribute) - .cloned(); - let original_text = match document.get(field) { - Some(Value::String(text)) => text, - Some(_) => return Err(Error::CropFieldWrongType(field.to_string())), - None => return Err(Error::AttributeNotFoundOnDocument(field.to_string())), - }; - let (cropped_text, cropped_matches) = crop_text(&original_text, selected_matches, length); + for (field, length) in fields { + let attribute = match schema.attribute(field) { + Some(attribute) => attribute, + None => continue, + }; - document.insert( - field.to_string(), - serde_json::value::Value::String(cropped_text), - ); - matches.retain(|m| SchemaAttr::new(m.attribute) != attribute); - matches.extend_from_slice(&cropped_matches); - Ok(()) + let selected_matches = matches + .iter() + .filter(|m| SchemaAttr::new(m.attribute) == attribute) + .cloned(); + + if let Some(Value::String(ref mut original_text)) = document.get_mut(field) { + let (cropped_text, cropped_matches) = + crop_text(original_text, selected_matches, *length); + + *original_text = cropped_text; + + matches.retain(|m| SchemaAttr::new(m.attribute) != attribute); + matches.extend_from_slice(&cropped_matches); + } + } } fn calculate_matches( @@ -496,13 +489,14 @@ fn calculate_matches( } fn calculate_highlights( - document: IndexMap, - matches: MatchesInfos, - attributes_to_highlight: HashSet, -) -> HighlightInfos { - let mut highlight_result: HashMap = HashMap::new(); + document: &IndexMap, + matches: &MatchesInfos, + attributes_to_highlight: &HashSet, +) -> IndexMap { + let mut highlight_result = IndexMap::new(); + for (attribute, matches) in matches.iter() { - if attributes_to_highlight.contains("*") || attributes_to_highlight.contains(attribute) { + if attributes_to_highlight.contains(attribute) { if let Some(Value::String(value)) = document.get(attribute) { let value: Vec<_> = value.chars().collect(); let mut highlighted_value = String::new(); @@ -527,6 +521,7 @@ fn calculate_highlights( }; } } + highlight_result } @@ -543,9 +538,10 @@ mod tests { let document: IndexMap = serde_json::from_str(data).unwrap(); let mut attributes_to_highlight = HashSet::new(); - attributes_to_highlight.insert("*".to_string()); + attributes_to_highlight.insert("title".to_string()); + attributes_to_highlight.insert("description".to_string()); - let mut matches: HashMap> = HashMap::new(); + let mut matches = HashMap::new(); let mut m = Vec::new(); m.push(MatchPosition { @@ -560,9 +556,9 @@ mod tests { length: 9, }); matches.insert("description".to_string(), m); - let result = super::calculate_highlights(document, matches, attributes_to_highlight); + let result = super::calculate_highlights(&document, &matches, &attributes_to_highlight); - let mut result_expected = HashMap::new(); + let mut result_expected = IndexMap::new(); result_expected.insert( "title".to_string(), Value::String("Fondation (Isaac ASIMOV)".to_string()), diff --git a/meilidb-http/src/routes/search.rs b/meilidb-http/src/routes/search.rs index 2a2b51a4c..116f76e03 100644 --- a/meilidb-http/src/routes/search.rs +++ b/meilidb-http/src/routes/search.rs @@ -36,6 +36,12 @@ pub async fn search_with_url_query(ctx: Context) -> SResult { let env = &ctx.state().db.env; let reader = env.read_txn().map_err(ResponseError::internal)?; + let schema = index + .main + .schema(&reader) + .map_err(ResponseError::internal)? + .ok_or(ResponseError::open_index("No Schema found"))?; + let query: SearchQuery = ctx .url_query() .map_err(|_| ResponseError::bad_request("invalid query parameter"))?; @@ -61,18 +67,31 @@ pub async fn search_with_url_query(ctx: Context) -> SResult { } if let Some(attributes_to_crop) = query.attributes_to_crop { let crop_length = query.crop_length.unwrap_or(200); - let attributes_to_crop = attributes_to_crop - .split(',') - .map(|r| (r.to_string(), crop_length)) - .collect(); - search_builder.attributes_to_crop(attributes_to_crop); + if attributes_to_crop == "*" { + let attributes_to_crop = schema + .iter() + .map(|(attr, ..)| (attr.to_string(), crop_length)) + .collect(); + search_builder.attributes_to_crop(attributes_to_crop); + } else { + let attributes_to_crop = attributes_to_crop + .split(',') + .map(|r| (r.to_string(), crop_length)) + .collect(); + search_builder.attributes_to_crop(attributes_to_crop); + } } if let Some(attributes_to_highlight) = query.attributes_to_highlight { - let attributes_to_highlight = attributes_to_highlight - .split(',') - .map(ToString::to_string) - .collect(); + let attributes_to_highlight = if attributes_to_highlight == "*" { + schema.iter().map(|(attr, ..)| attr.to_string()).collect() + } else { + attributes_to_highlight + .split(',') + .map(ToString::to_string) + .collect() + }; + search_builder.attributes_to_highlight(attributes_to_highlight); }