mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
Merge pull request #297 from meilisearch/improve-highlights
Improve the highlight formatted outputs
This commit is contained in:
commit
fc8c7ed77e
@ -17,6 +17,7 @@ pub enum ResponseError {
|
|||||||
DocumentNotFound(String),
|
DocumentNotFound(String),
|
||||||
MissingHeader(String),
|
MissingHeader(String),
|
||||||
BadParameter(String, String),
|
BadParameter(String, String),
|
||||||
|
OpenIndex(String),
|
||||||
CreateIndex(String),
|
CreateIndex(String),
|
||||||
Maintenance,
|
Maintenance,
|
||||||
}
|
}
|
||||||
@ -54,6 +55,10 @@ impl ResponseError {
|
|||||||
ResponseError::BadParameter(name.to_string(), message.to_string())
|
ResponseError::BadParameter(name.to_string(), message.to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn open_index(message: impl Display) -> ResponseError {
|
||||||
|
ResponseError::OpenIndex(message.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn create_index(message: impl Display) -> ResponseError {
|
pub fn create_index(message: impl Display) -> ResponseError {
|
||||||
ResponseError::CreateIndex(message.to_string())
|
ResponseError::CreateIndex(message.to_string())
|
||||||
}
|
}
|
||||||
@ -96,6 +101,10 @@ impl IntoResponse for ResponseError {
|
|||||||
format!("Impossible to create index; {}", err),
|
format!("Impossible to create index; {}", err),
|
||||||
StatusCode::BAD_REQUEST,
|
StatusCode::BAD_REQUEST,
|
||||||
),
|
),
|
||||||
|
ResponseError::OpenIndex(err) => error(
|
||||||
|
format!("Impossible to open index; {}", err),
|
||||||
|
StatusCode::BAD_REQUEST,
|
||||||
|
),
|
||||||
ResponseError::Maintenance => error(
|
ResponseError::Maintenance => error(
|
||||||
String::from("Server is in maintenance, please try again later"),
|
String::from("Server is in maintenance, please try again later"),
|
||||||
StatusCode::SERVICE_UNAVAILABLE,
|
StatusCode::SERVICE_UNAVAILABLE,
|
||||||
|
@ -235,43 +235,35 @@ impl<'a> SearchBuilder<'a> {
|
|||||||
}
|
}
|
||||||
fields = Some(set);
|
fields = Some(set);
|
||||||
}
|
}
|
||||||
let mut document: IndexMap<String, Value> = self
|
|
||||||
|
let document: IndexMap<String, Value> = self
|
||||||
.index
|
.index
|
||||||
.document(reader, fields.as_ref(), doc.id)
|
.document(reader, fields.as_ref(), doc.id)
|
||||||
.map_err(|e| Error::RetrieveDocument(doc.id.0, e.to_string()))?
|
.map_err(|e| Error::RetrieveDocument(doc.id.0, e.to_string()))?
|
||||||
.ok_or(Error::DocumentNotFound(doc.id.0))?;
|
.ok_or(Error::DocumentNotFound(doc.id.0))?;
|
||||||
|
|
||||||
|
let mut formatted = document.clone();
|
||||||
let mut matches = doc.highlights.clone();
|
let mut matches = doc.highlights.clone();
|
||||||
|
|
||||||
// Crops fields if needed
|
// Crops fields if needed
|
||||||
if let Some(fields) = self.attributes_to_crop.clone() {
|
if let Some(fields) = &self.attributes_to_crop {
|
||||||
for (field, length) in fields {
|
crop_document(&mut formatted, &mut matches, &schema, fields);
|
||||||
let _ = crop_document(&mut document, &mut matches, &schema, &field, length);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transform to readable matches
|
// Transform to readable matches
|
||||||
let matches = calculate_matches(matches, self.attributes_to_retrieve.clone(), &schema);
|
let matches = calculate_matches(matches, self.attributes_to_retrieve.clone(), &schema);
|
||||||
|
|
||||||
if !self.matches {
|
if !self.matches {
|
||||||
if let Some(attributes_to_highlight) = self.attributes_to_highlight.clone() {
|
if let Some(attributes_to_highlight) = &self.attributes_to_highlight {
|
||||||
let highlights = calculate_highlights(
|
formatted = calculate_highlights(&formatted, &matches, attributes_to_highlight);
|
||||||
document.clone(),
|
|
||||||
matches.clone(),
|
|
||||||
attributes_to_highlight,
|
|
||||||
);
|
|
||||||
for (key, value) in highlights {
|
|
||||||
if let Some(content) = document.get_mut(&key) {
|
|
||||||
*content = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let matches_info = if self.matches { Some(matches) } else { None };
|
let matches_info = if self.matches { Some(matches) } else { None };
|
||||||
|
|
||||||
let hit = SearchHit {
|
let hit = SearchHit {
|
||||||
hit: document,
|
document,
|
||||||
|
formatted,
|
||||||
matches_info,
|
matches_info,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -388,7 +380,9 @@ pub type MatchesInfos = HashMap<String, Vec<MatchPosition>>;
|
|||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct SearchHit {
|
pub struct SearchHit {
|
||||||
#[serde(flatten)]
|
#[serde(flatten)]
|
||||||
pub hit: IndexMap<String, Value>,
|
pub document: IndexMap<String, Value>,
|
||||||
|
#[serde(rename = "_formatted", skip_serializing_if = "IndexMap::is_empty")]
|
||||||
|
pub formatted: IndexMap<String, Value>,
|
||||||
#[serde(rename = "_matchesInfo", skip_serializing_if = "Option::is_none")]
|
#[serde(rename = "_matchesInfo", skip_serializing_if = "Option::is_none")]
|
||||||
pub matches_info: Option<MatchesInfos>,
|
pub matches_info: Option<MatchesInfos>,
|
||||||
}
|
}
|
||||||
@ -431,32 +425,31 @@ fn crop_document(
|
|||||||
document: &mut IndexMap<String, Value>,
|
document: &mut IndexMap<String, Value>,
|
||||||
matches: &mut Vec<Highlight>,
|
matches: &mut Vec<Highlight>,
|
||||||
schema: &Schema,
|
schema: &Schema,
|
||||||
field: &str,
|
fields: &HashMap<String, usize>,
|
||||||
length: usize,
|
) {
|
||||||
) -> Result<(), Error> {
|
|
||||||
matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
|
matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
|
||||||
|
|
||||||
let attribute = schema
|
for (field, length) in fields {
|
||||||
.attribute(field)
|
let attribute = match schema.attribute(field) {
|
||||||
.ok_or(Error::AttributeNotFoundOnSchema(field.to_string()))?;
|
Some(attribute) => attribute,
|
||||||
let selected_matches = matches
|
None => continue,
|
||||||
.iter()
|
};
|
||||||
.filter(|m| SchemaAttr::new(m.attribute) == attribute)
|
|
||||||
.cloned();
|
|
||||||
let original_text = match document.get(field) {
|
|
||||||
Some(Value::String(text)) => text,
|
|
||||||
Some(_) => return Err(Error::CropFieldWrongType(field.to_string())),
|
|
||||||
None => return Err(Error::AttributeNotFoundOnDocument(field.to_string())),
|
|
||||||
};
|
|
||||||
let (cropped_text, cropped_matches) = crop_text(&original_text, selected_matches, length);
|
|
||||||
|
|
||||||
document.insert(
|
let selected_matches = matches
|
||||||
field.to_string(),
|
.iter()
|
||||||
serde_json::value::Value::String(cropped_text),
|
.filter(|m| SchemaAttr::new(m.attribute) == attribute)
|
||||||
);
|
.cloned();
|
||||||
matches.retain(|m| SchemaAttr::new(m.attribute) != attribute);
|
|
||||||
matches.extend_from_slice(&cropped_matches);
|
if let Some(Value::String(ref mut original_text)) = document.get_mut(field) {
|
||||||
Ok(())
|
let (cropped_text, cropped_matches) =
|
||||||
|
crop_text(original_text, selected_matches, *length);
|
||||||
|
|
||||||
|
*original_text = cropped_text;
|
||||||
|
|
||||||
|
matches.retain(|m| SchemaAttr::new(m.attribute) != attribute);
|
||||||
|
matches.extend_from_slice(&cropped_matches);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn calculate_matches(
|
fn calculate_matches(
|
||||||
@ -496,13 +489,14 @@ fn calculate_matches(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn calculate_highlights(
|
fn calculate_highlights(
|
||||||
document: IndexMap<String, Value>,
|
document: &IndexMap<String, Value>,
|
||||||
matches: MatchesInfos,
|
matches: &MatchesInfos,
|
||||||
attributes_to_highlight: HashSet<String>,
|
attributes_to_highlight: &HashSet<String>,
|
||||||
) -> HighlightInfos {
|
) -> IndexMap<String, Value> {
|
||||||
let mut highlight_result: HashMap<String, Value> = HashMap::new();
|
let mut highlight_result = IndexMap::new();
|
||||||
|
|
||||||
for (attribute, matches) in matches.iter() {
|
for (attribute, matches) in matches.iter() {
|
||||||
if attributes_to_highlight.contains("*") || attributes_to_highlight.contains(attribute) {
|
if attributes_to_highlight.contains(attribute) {
|
||||||
if let Some(Value::String(value)) = document.get(attribute) {
|
if let Some(Value::String(value)) = document.get(attribute) {
|
||||||
let value: Vec<_> = value.chars().collect();
|
let value: Vec<_> = value.chars().collect();
|
||||||
let mut highlighted_value = String::new();
|
let mut highlighted_value = String::new();
|
||||||
@ -527,6 +521,7 @@ fn calculate_highlights(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
highlight_result
|
highlight_result
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -543,9 +538,10 @@ mod tests {
|
|||||||
|
|
||||||
let document: IndexMap<String, Value> = serde_json::from_str(data).unwrap();
|
let document: IndexMap<String, Value> = serde_json::from_str(data).unwrap();
|
||||||
let mut attributes_to_highlight = HashSet::new();
|
let mut attributes_to_highlight = HashSet::new();
|
||||||
attributes_to_highlight.insert("*".to_string());
|
attributes_to_highlight.insert("title".to_string());
|
||||||
|
attributes_to_highlight.insert("description".to_string());
|
||||||
|
|
||||||
let mut matches: HashMap<String, Vec<MatchPosition>> = HashMap::new();
|
let mut matches = HashMap::new();
|
||||||
|
|
||||||
let mut m = Vec::new();
|
let mut m = Vec::new();
|
||||||
m.push(MatchPosition {
|
m.push(MatchPosition {
|
||||||
@ -560,9 +556,9 @@ mod tests {
|
|||||||
length: 9,
|
length: 9,
|
||||||
});
|
});
|
||||||
matches.insert("description".to_string(), m);
|
matches.insert("description".to_string(), m);
|
||||||
let result = super::calculate_highlights(document, matches, attributes_to_highlight);
|
let result = super::calculate_highlights(&document, &matches, &attributes_to_highlight);
|
||||||
|
|
||||||
let mut result_expected = HashMap::new();
|
let mut result_expected = IndexMap::new();
|
||||||
result_expected.insert(
|
result_expected.insert(
|
||||||
"title".to_string(),
|
"title".to_string(),
|
||||||
Value::String("<em>Fondation</em> (Isaac ASIMOV)".to_string()),
|
Value::String("<em>Fondation</em> (Isaac ASIMOV)".to_string()),
|
||||||
|
@ -36,6 +36,12 @@ pub async fn search_with_url_query(ctx: Context<Data>) -> SResult<Response> {
|
|||||||
let env = &ctx.state().db.env;
|
let env = &ctx.state().db.env;
|
||||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||||
|
|
||||||
|
let schema = index
|
||||||
|
.main
|
||||||
|
.schema(&reader)
|
||||||
|
.map_err(ResponseError::internal)?
|
||||||
|
.ok_or(ResponseError::open_index("No Schema found"))?;
|
||||||
|
|
||||||
let query: SearchQuery = ctx
|
let query: SearchQuery = ctx
|
||||||
.url_query()
|
.url_query()
|
||||||
.map_err(|_| ResponseError::bad_request("invalid query parameter"))?;
|
.map_err(|_| ResponseError::bad_request("invalid query parameter"))?;
|
||||||
@ -61,18 +67,31 @@ pub async fn search_with_url_query(ctx: Context<Data>) -> SResult<Response> {
|
|||||||
}
|
}
|
||||||
if let Some(attributes_to_crop) = query.attributes_to_crop {
|
if let Some(attributes_to_crop) = query.attributes_to_crop {
|
||||||
let crop_length = query.crop_length.unwrap_or(200);
|
let crop_length = query.crop_length.unwrap_or(200);
|
||||||
let attributes_to_crop = attributes_to_crop
|
if attributes_to_crop == "*" {
|
||||||
.split(',')
|
let attributes_to_crop = schema
|
||||||
.map(|r| (r.to_string(), crop_length))
|
.iter()
|
||||||
.collect();
|
.map(|(attr, ..)| (attr.to_string(), crop_length))
|
||||||
search_builder.attributes_to_crop(attributes_to_crop);
|
.collect();
|
||||||
|
search_builder.attributes_to_crop(attributes_to_crop);
|
||||||
|
} else {
|
||||||
|
let attributes_to_crop = attributes_to_crop
|
||||||
|
.split(',')
|
||||||
|
.map(|r| (r.to_string(), crop_length))
|
||||||
|
.collect();
|
||||||
|
search_builder.attributes_to_crop(attributes_to_crop);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(attributes_to_highlight) = query.attributes_to_highlight {
|
if let Some(attributes_to_highlight) = query.attributes_to_highlight {
|
||||||
let attributes_to_highlight = attributes_to_highlight
|
let attributes_to_highlight = if attributes_to_highlight == "*" {
|
||||||
.split(',')
|
schema.iter().map(|(attr, ..)| attr.to_string()).collect()
|
||||||
.map(ToString::to_string)
|
} else {
|
||||||
.collect();
|
attributes_to_highlight
|
||||||
|
.split(',')
|
||||||
|
.map(ToString::to_string)
|
||||||
|
.collect()
|
||||||
|
};
|
||||||
|
|
||||||
search_builder.attributes_to_highlight(attributes_to_highlight);
|
search_builder.attributes_to_highlight(attributes_to_highlight);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user