Improve the highlight formatted outputs

This commit is contained in:
Clément Renault 2019-11-15 12:04:46 +01:00
parent 9788779894
commit 521c96354f
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
3 changed files with 84 additions and 60 deletions

View File

@ -17,6 +17,7 @@ pub enum ResponseError {
DocumentNotFound(String), DocumentNotFound(String),
MissingHeader(String), MissingHeader(String),
BadParameter(String, String), BadParameter(String, String),
OpenIndex(String),
CreateIndex(String), CreateIndex(String),
Maintenance, Maintenance,
} }
@ -54,6 +55,10 @@ impl ResponseError {
ResponseError::BadParameter(name.to_string(), message.to_string()) ResponseError::BadParameter(name.to_string(), message.to_string())
} }
pub fn open_index(message: impl Display) -> ResponseError {
ResponseError::OpenIndex(message.to_string())
}
pub fn create_index(message: impl Display) -> ResponseError { pub fn create_index(message: impl Display) -> ResponseError {
ResponseError::CreateIndex(message.to_string()) ResponseError::CreateIndex(message.to_string())
} }
@ -96,6 +101,10 @@ impl IntoResponse for ResponseError {
format!("Impossible to create index; {}", err), format!("Impossible to create index; {}", err),
StatusCode::BAD_REQUEST, StatusCode::BAD_REQUEST,
), ),
ResponseError::OpenIndex(err) => error(
format!("Impossible to open index; {}", err),
StatusCode::BAD_REQUEST,
),
ResponseError::Maintenance => error( ResponseError::Maintenance => error(
String::from("Server is in maintenance, please try again later"), String::from("Server is in maintenance, please try again later"),
StatusCode::SERVICE_UNAVAILABLE, StatusCode::SERVICE_UNAVAILABLE,

View File

@ -235,43 +235,35 @@ impl<'a> SearchBuilder<'a> {
} }
fields = Some(set); fields = Some(set);
} }
let mut document: IndexMap<String, Value> = self
let document: IndexMap<String, Value> = self
.index .index
.document(reader, fields.as_ref(), doc.id) .document(reader, fields.as_ref(), doc.id)
.map_err(|e| Error::RetrieveDocument(doc.id.0, e.to_string()))? .map_err(|e| Error::RetrieveDocument(doc.id.0, e.to_string()))?
.ok_or(Error::DocumentNotFound(doc.id.0))?; .ok_or(Error::DocumentNotFound(doc.id.0))?;
let mut formatted = document.clone();
let mut matches = doc.highlights.clone(); let mut matches = doc.highlights.clone();
// Crops fields if needed // Crops fields if needed
if let Some(fields) = self.attributes_to_crop.clone() { if let Some(fields) = &self.attributes_to_crop {
for (field, length) in fields { crop_document(&mut formatted, &mut matches, &schema, fields);
let _ = crop_document(&mut document, &mut matches, &schema, &field, length);
}
} }
// Transform to readable matches // Transform to readable matches
let matches = calculate_matches(matches, self.attributes_to_retrieve.clone(), &schema); let matches = calculate_matches(matches, self.attributes_to_retrieve.clone(), &schema);
if !self.matches { if !self.matches {
if let Some(attributes_to_highlight) = self.attributes_to_highlight.clone() { if let Some(attributes_to_highlight) = &self.attributes_to_highlight {
let highlights = calculate_highlights( formatted = calculate_highlights(&formatted, &matches, attributes_to_highlight);
document.clone(),
matches.clone(),
attributes_to_highlight,
);
for (key, value) in highlights {
if let Some(content) = document.get_mut(&key) {
*content = value;
}
}
} }
} }
let matches_info = if self.matches { Some(matches) } else { None }; let matches_info = if self.matches { Some(matches) } else { None };
let hit = SearchHit { let hit = SearchHit {
hit: document, document,
formatted,
matches_info, matches_info,
}; };
@ -388,7 +380,9 @@ pub type MatchesInfos = HashMap<String, Vec<MatchPosition>>;
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchHit { pub struct SearchHit {
#[serde(flatten)] #[serde(flatten)]
pub hit: IndexMap<String, Value>, pub document: IndexMap<String, Value>,
#[serde(rename = "_formatted", skip_serializing_if = "IndexMap::is_empty")]
pub formatted: IndexMap<String, Value>,
#[serde(rename = "_matchesInfo", skip_serializing_if = "Option::is_none")] #[serde(rename = "_matchesInfo", skip_serializing_if = "Option::is_none")]
pub matches_info: Option<MatchesInfos>, pub matches_info: Option<MatchesInfos>,
} }
@ -431,32 +425,31 @@ fn crop_document(
document: &mut IndexMap<String, Value>, document: &mut IndexMap<String, Value>,
matches: &mut Vec<Highlight>, matches: &mut Vec<Highlight>,
schema: &Schema, schema: &Schema,
field: &str, fields: &HashMap<String, usize>,
length: usize, ) {
) -> Result<(), Error> {
matches.sort_unstable_by_key(|m| (m.char_index, m.char_length)); matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
let attribute = schema for (field, length) in fields {
.attribute(field) let attribute = match schema.attribute(field) {
.ok_or(Error::AttributeNotFoundOnSchema(field.to_string()))?; Some(attribute) => attribute,
None => continue,
};
let selected_matches = matches let selected_matches = matches
.iter() .iter()
.filter(|m| SchemaAttr::new(m.attribute) == attribute) .filter(|m| SchemaAttr::new(m.attribute) == attribute)
.cloned(); .cloned();
let original_text = match document.get(field) {
Some(Value::String(text)) => text,
Some(_) => return Err(Error::CropFieldWrongType(field.to_string())),
None => return Err(Error::AttributeNotFoundOnDocument(field.to_string())),
};
let (cropped_text, cropped_matches) = crop_text(&original_text, selected_matches, length);
document.insert( if let Some(Value::String(ref mut original_text)) = document.get_mut(field) {
field.to_string(), let (cropped_text, cropped_matches) =
serde_json::value::Value::String(cropped_text), crop_text(original_text, selected_matches, *length);
);
*original_text = cropped_text;
matches.retain(|m| SchemaAttr::new(m.attribute) != attribute); matches.retain(|m| SchemaAttr::new(m.attribute) != attribute);
matches.extend_from_slice(&cropped_matches); matches.extend_from_slice(&cropped_matches);
Ok(()) }
}
} }
fn calculate_matches( fn calculate_matches(
@ -496,13 +489,14 @@ fn calculate_matches(
} }
fn calculate_highlights( fn calculate_highlights(
document: IndexMap<String, Value>, document: &IndexMap<String, Value>,
matches: MatchesInfos, matches: &MatchesInfos,
attributes_to_highlight: HashSet<String>, attributes_to_highlight: &HashSet<String>,
) -> HighlightInfos { ) -> IndexMap<String, Value> {
let mut highlight_result: HashMap<String, Value> = HashMap::new(); let mut highlight_result = IndexMap::new();
for (attribute, matches) in matches.iter() { for (attribute, matches) in matches.iter() {
if attributes_to_highlight.contains("*") || attributes_to_highlight.contains(attribute) { if attributes_to_highlight.contains(attribute) {
if let Some(Value::String(value)) = document.get(attribute) { if let Some(Value::String(value)) = document.get(attribute) {
let value: Vec<_> = value.chars().collect(); let value: Vec<_> = value.chars().collect();
let mut highlighted_value = String::new(); let mut highlighted_value = String::new();
@ -527,6 +521,7 @@ fn calculate_highlights(
}; };
} }
} }
highlight_result highlight_result
} }
@ -543,9 +538,10 @@ mod tests {
let document: IndexMap<String, Value> = serde_json::from_str(data).unwrap(); let document: IndexMap<String, Value> = serde_json::from_str(data).unwrap();
let mut attributes_to_highlight = HashSet::new(); let mut attributes_to_highlight = HashSet::new();
attributes_to_highlight.insert("*".to_string()); attributes_to_highlight.insert("title".to_string());
attributes_to_highlight.insert("description".to_string());
let mut matches: HashMap<String, Vec<MatchPosition>> = HashMap::new(); let mut matches = HashMap::new();
let mut m = Vec::new(); let mut m = Vec::new();
m.push(MatchPosition { m.push(MatchPosition {
@ -560,9 +556,9 @@ mod tests {
length: 9, length: 9,
}); });
matches.insert("description".to_string(), m); matches.insert("description".to_string(), m);
let result = super::calculate_highlights(document, matches, attributes_to_highlight); let result = super::calculate_highlights(&document, &matches, &attributes_to_highlight);
let mut result_expected = HashMap::new(); let mut result_expected = IndexMap::new();
result_expected.insert( result_expected.insert(
"title".to_string(), "title".to_string(),
Value::String("<em>Fondation</em> (Isaac ASIMOV)".to_string()), Value::String("<em>Fondation</em> (Isaac ASIMOV)".to_string()),

View File

@ -36,6 +36,12 @@ pub async fn search_with_url_query(ctx: Context<Data>) -> SResult<Response> {
let env = &ctx.state().db.env; let env = &ctx.state().db.env;
let reader = env.read_txn().map_err(ResponseError::internal)?; let reader = env.read_txn().map_err(ResponseError::internal)?;
let schema = index
.main
.schema(&reader)
.map_err(ResponseError::internal)?
.ok_or(ResponseError::open_index("No Schema found"))?;
let query: SearchQuery = ctx let query: SearchQuery = ctx
.url_query() .url_query()
.map_err(|_| ResponseError::bad_request("invalid query parameter"))?; .map_err(|_| ResponseError::bad_request("invalid query parameter"))?;
@ -61,18 +67,31 @@ pub async fn search_with_url_query(ctx: Context<Data>) -> SResult<Response> {
} }
if let Some(attributes_to_crop) = query.attributes_to_crop { if let Some(attributes_to_crop) = query.attributes_to_crop {
let crop_length = query.crop_length.unwrap_or(200); let crop_length = query.crop_length.unwrap_or(200);
if attributes_to_crop == "*" {
let attributes_to_crop = schema
.iter()
.map(|(attr, ..)| (attr.to_string(), crop_length))
.collect();
search_builder.attributes_to_crop(attributes_to_crop);
} else {
let attributes_to_crop = attributes_to_crop let attributes_to_crop = attributes_to_crop
.split(',') .split(',')
.map(|r| (r.to_string(), crop_length)) .map(|r| (r.to_string(), crop_length))
.collect(); .collect();
search_builder.attributes_to_crop(attributes_to_crop); search_builder.attributes_to_crop(attributes_to_crop);
} }
}
if let Some(attributes_to_highlight) = query.attributes_to_highlight { if let Some(attributes_to_highlight) = query.attributes_to_highlight {
let attributes_to_highlight = attributes_to_highlight let attributes_to_highlight = if attributes_to_highlight == "*" {
schema.iter().map(|(attr, ..)| attr.to_string()).collect()
} else {
attributes_to_highlight
.split(',') .split(',')
.map(ToString::to_string) .map(ToString::to_string)
.collect(); .collect()
};
search_builder.attributes_to_highlight(attributes_to_highlight); search_builder.attributes_to_highlight(attributes_to_highlight);
} }