mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-25 22:34:28 +01:00
Merge pull request #679 from MarinPostma/highlight-align-fix
Highlight align fix
This commit is contained in:
commit
e2b71b0e57
@ -1,6 +1,7 @@
|
|||||||
## v0.10.2
|
## v0.10.2
|
||||||
|
|
||||||
- Disable sentry in debug (#681)
|
- Disable sentry in debug (#681)
|
||||||
|
- Fix highlight misalignment (#679)
|
||||||
- Add support for facet count (#676)
|
- Add support for facet count (#676)
|
||||||
- Add support for faceted search (#631)
|
- Add support for faceted search (#631)
|
||||||
- Add support for configuring the lmdb map size (#646, #647)
|
- Add support for configuring the lmdb map size (#646, #647)
|
||||||
|
@ -214,7 +214,7 @@ impl<'a> SearchBuilder<'a> {
|
|||||||
// Transform to readable matches
|
// Transform to readable matches
|
||||||
if let Some(attributes_to_highlight) = &self.attributes_to_highlight {
|
if let Some(attributes_to_highlight) = &self.attributes_to_highlight {
|
||||||
let matches = calculate_matches(
|
let matches = calculate_matches(
|
||||||
matches.clone(),
|
&matches,
|
||||||
self.attributes_to_highlight.clone(),
|
self.attributes_to_highlight.clone(),
|
||||||
&schema,
|
&schema,
|
||||||
);
|
);
|
||||||
@ -222,7 +222,7 @@ impl<'a> SearchBuilder<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let matches_info = if self.matches {
|
let matches_info = if self.matches {
|
||||||
Some(calculate_matches(matches, self.attributes_to_retrieve.clone(), &schema))
|
Some(calculate_matches(&matches, self.attributes_to_retrieve.clone(), &schema))
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
@ -352,10 +352,14 @@ fn aligned_crop(text: &str, match_index: usize, context: usize) -> (usize, usize
|
|||||||
return (match_index, 1 + text.chars().skip(match_index).take_while(is_word_component).count());
|
return (match_index, 1 + text.chars().skip(match_index).take_while(is_word_component).count());
|
||||||
}
|
}
|
||||||
let start = match match_index.saturating_sub(context) {
|
let start = match match_index.saturating_sub(context) {
|
||||||
n if n == 0 => n,
|
0 => 0,
|
||||||
n => word_end_index(n)
|
n => {
|
||||||
|
let word_end_index = word_end_index(n);
|
||||||
|
// skip whitespaces if any
|
||||||
|
word_end_index + text.chars().skip(word_end_index).take_while(char::is_ascii_whitespace).count()
|
||||||
|
}
|
||||||
};
|
};
|
||||||
let end = word_end_index(start + 2 * context);
|
let end = word_end_index(match_index + context);
|
||||||
|
|
||||||
(start, end - start)
|
(start, end - start)
|
||||||
}
|
}
|
||||||
@ -370,15 +374,21 @@ fn crop_text(
|
|||||||
let char_index = matches.peek().map(|m| m.char_index as usize).unwrap_or(0);
|
let char_index = matches.peek().map(|m| m.char_index as usize).unwrap_or(0);
|
||||||
let (start, count) = aligned_crop(text, char_index, context);
|
let (start, count) = aligned_crop(text, char_index, context);
|
||||||
|
|
||||||
//TODO do something about the double allocation
|
// TODO do something about double allocation
|
||||||
let text = text.chars().skip(start).take(count).collect::<String>().trim().to_string();
|
let text = text
|
||||||
|
.chars()
|
||||||
|
.skip(start)
|
||||||
|
.take(count)
|
||||||
|
.collect::<String>()
|
||||||
|
.trim()
|
||||||
|
.to_string();
|
||||||
|
|
||||||
// update matches index to match the new cropped text
|
// update matches index to match the new cropped text
|
||||||
let matches = matches
|
let matches = matches
|
||||||
.take_while(|m| (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2))
|
.take_while(|m| (m.char_index as usize) + (m.char_length as usize) <= start + count)
|
||||||
.map(|match_| Highlight {
|
.map(|m| Highlight {
|
||||||
char_index: match_.char_index - start as u16,
|
char_index: m.char_index - start as u16,
|
||||||
..match_
|
..m
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
@ -417,14 +427,14 @@ fn crop_document(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn calculate_matches(
|
fn calculate_matches(
|
||||||
matches: Vec<Highlight>,
|
matches: &[Highlight],
|
||||||
attributes_to_retrieve: Option<HashSet<String>>,
|
attributes_to_retrieve: Option<HashSet<String>>,
|
||||||
schema: &Schema,
|
schema: &Schema,
|
||||||
) -> MatchesInfos {
|
) -> MatchesInfos {
|
||||||
let mut matches_result: HashMap<String, Vec<MatchPosition>> = HashMap::new();
|
let mut matches_result: HashMap<String, Vec<MatchPosition>> = HashMap::new();
|
||||||
for m in matches.iter() {
|
for m in matches.iter() {
|
||||||
if let Some(attribute) = schema.name(FieldId::new(m.attribute)) {
|
if let Some(attribute) = schema.name(FieldId::new(m.attribute)) {
|
||||||
if let Some(attributes_to_retrieve) = attributes_to_retrieve.clone() {
|
if let Some(ref attributes_to_retrieve) = attributes_to_retrieve {
|
||||||
if !attributes_to_retrieve.contains(attribute) {
|
if !attributes_to_retrieve.contains(attribute) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -470,21 +480,20 @@ fn calculate_highlights(
|
|||||||
|
|
||||||
let longest_matches = matches
|
let longest_matches = matches
|
||||||
.linear_group_by_key(|m| m.start)
|
.linear_group_by_key(|m| m.start)
|
||||||
.map(|group| group.last().unwrap());
|
.map(|group| group.last().unwrap())
|
||||||
|
.filter(move |m| m.start >= index);
|
||||||
|
|
||||||
for m in longest_matches {
|
for m in longest_matches {
|
||||||
if m.start >= index {
|
let before = value.get(index..m.start);
|
||||||
let before = value.get(index..m.start);
|
let highlighted = value.get(m.start..(m.start + m.length));
|
||||||
let highlighted = value.get(m.start..(m.start + m.length));
|
if let (Some(before), Some(highlighted)) = (before, highlighted) {
|
||||||
if let (Some(before), Some(highlighted)) = (before, highlighted) {
|
highlighted_value.extend(before);
|
||||||
highlighted_value.extend(before);
|
highlighted_value.push_str("<em>");
|
||||||
highlighted_value.push_str("<em>");
|
highlighted_value.extend(highlighted);
|
||||||
highlighted_value.extend(highlighted);
|
highlighted_value.push_str("</em>");
|
||||||
highlighted_value.push_str("</em>");
|
index = m.start + m.length;
|
||||||
index = m.start + m.length;
|
} else {
|
||||||
} else {
|
error!("value: {:?}; index: {:?}, match: {:?}", value, index, m);
|
||||||
error!("value: {:?}; index: {:?}, match: {:?}", value, index, m);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
highlighted_value.extend(value[index..].iter());
|
highlighted_value.extend(value[index..].iter());
|
||||||
@ -492,7 +501,6 @@ fn calculate_highlights(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
highlight_result
|
highlight_result
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -524,13 +532,12 @@ mod tests {
|
|||||||
// mixed charset
|
// mixed charset
|
||||||
let (start, length) = aligned_crop(&text, 5, 3);
|
let (start, length) = aligned_crop(&text, 5, 3);
|
||||||
let cropped = text.chars().skip(start).take(length).collect::<String>().trim().to_string();
|
let cropped = text.chars().skip(start).take(length).collect::<String>().trim().to_string();
|
||||||
assert_eq!("isのス", cropped);
|
assert_eq!("isの", cropped);
|
||||||
|
|
||||||
// split regular word / CJK word, no space
|
// split regular word / CJK word, no space
|
||||||
let (start, length) = aligned_crop(&text, 7, 1);
|
let (start, length) = aligned_crop(&text, 7, 1);
|
||||||
let cropped = text.chars().skip(start).take(length).collect::<String>().trim().to_string();
|
let cropped = text.chars().skip(start).take(length).collect::<String>().trim().to_string();
|
||||||
assert_eq!("のス", cropped);
|
assert_eq!("の", cropped);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -544,7 +551,7 @@ mod tests {
|
|||||||
|
|
||||||
let schema = Schema::with_primary_key("title");
|
let schema = Schema::with_primary_key("title");
|
||||||
|
|
||||||
let matches_result = super::calculate_matches(matches, Some(attributes_to_retrieve), &schema);
|
let matches_result = super::calculate_matches(&matches, Some(attributes_to_retrieve), &schema);
|
||||||
|
|
||||||
let mut matches_result_expected: HashMap<String, Vec<MatchPosition>> = HashMap::new();
|
let mut matches_result_expected: HashMap<String, Vec<MatchPosition>> = HashMap::new();
|
||||||
|
|
||||||
|
@ -1343,3 +1343,67 @@ async fn test_facet_count() {
|
|||||||
let (_response, status_code) = server.search(query).await;
|
let (_response, status_code) = server.search(query).await;
|
||||||
assert_eq!(status_code, 400);
|
assert_eq!(status_code, 400);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn highlight_cropped_text() {
|
||||||
|
let mut server = common::Server::with_uid("test");
|
||||||
|
|
||||||
|
let body = json!({
|
||||||
|
"uid": "test",
|
||||||
|
"primaryKey": "id",
|
||||||
|
});
|
||||||
|
server.create_index(body).await;
|
||||||
|
|
||||||
|
let doc = json!([
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"body": r##"well, it may not work like that, try the following:
|
||||||
|
1. insert your trip
|
||||||
|
2. google your `searchQuery`
|
||||||
|
3. find a solution
|
||||||
|
> say hello"##
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
server.add_or_replace_multiple_documents(doc).await;
|
||||||
|
|
||||||
|
// tests from #680
|
||||||
|
let query = "q=insert&attributesToHighlight=*&attributesToCrop=body&cropLength=30";
|
||||||
|
let (response, _status_code) = server.search(query).await;
|
||||||
|
let expected_response = "that, try the following: \n1. <em>insert</em> your trip\n2. google your";
|
||||||
|
assert_eq!(response
|
||||||
|
.get("hits")
|
||||||
|
.unwrap()
|
||||||
|
.as_array()
|
||||||
|
.unwrap()
|
||||||
|
.get(0)
|
||||||
|
.unwrap()
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.get("_formatted")
|
||||||
|
.unwrap()
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.get("body")
|
||||||
|
.unwrap()
|
||||||
|
, &Value::String(expected_response.to_owned()));
|
||||||
|
|
||||||
|
let query = "q=insert&attributesToHighlight=*&attributesToCrop=body&cropLength=80";
|
||||||
|
let (response, _status_code) = server.search(query).await;
|
||||||
|
let expected_response = "well, it may not work like that, try the following: \n1. <em>insert</em> your trip\n2. google your `searchQuery`\n3. find a solution \n> say hello";
|
||||||
|
assert_eq!(response
|
||||||
|
.get("hits")
|
||||||
|
.unwrap()
|
||||||
|
.as_array()
|
||||||
|
.unwrap()
|
||||||
|
.get(0)
|
||||||
|
.unwrap()
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.get("_formatted")
|
||||||
|
.unwrap()
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.get("body")
|
||||||
|
.unwrap()
|
||||||
|
, &Value::String(expected_response.to_owned()));
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user