mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 08:14:26 +01:00
Merge #4028
4028: Fix highlighting bug when searching for a phrase with cropping r=ManyTheFish a=vivek-26 # Pull Request ## Related issue Fixes #3975 ## What does this PR do? This PR - - Fixes the bug where searching **only** for a phrase (containing multiple words) along with cropping, highlighted only the first word of the phrase. - Adds unit test case for the above mentioned scenario. ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Vivek Kumar <vivek.26@outlook.com>
This commit is contained in:
commit
462b4654c4
@ -418,19 +418,11 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
} else {
|
} else {
|
||||||
match &self.matches {
|
match &self.matches {
|
||||||
Some((tokens, matches)) => {
|
Some((tokens, matches)) => {
|
||||||
// If the text has to be cropped,
|
|
||||||
// compute the best interval to crop around.
|
|
||||||
let matches = match format_options.crop {
|
|
||||||
Some(crop_size) if crop_size > 0 => {
|
|
||||||
self.find_best_match_interval(matches, crop_size)
|
|
||||||
}
|
|
||||||
_ => matches,
|
|
||||||
};
|
|
||||||
|
|
||||||
// If the text has to be cropped,
|
// If the text has to be cropped,
|
||||||
// crop around the best interval.
|
// crop around the best interval.
|
||||||
let (byte_start, byte_end) = match format_options.crop {
|
let (byte_start, byte_end) = match format_options.crop {
|
||||||
Some(crop_size) if crop_size > 0 => {
|
Some(crop_size) if crop_size > 0 => {
|
||||||
|
let matches = self.find_best_match_interval(matches, crop_size);
|
||||||
self.crop_bounds(tokens, matches, crop_size)
|
self.crop_bounds(tokens, matches, crop_size)
|
||||||
}
|
}
|
||||||
_ => (0, self.text.len()),
|
_ => (0, self.text.len()),
|
||||||
@ -450,6 +442,11 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
for m in matches {
|
for m in matches {
|
||||||
let token = &tokens[m.token_position];
|
let token = &tokens[m.token_position];
|
||||||
|
|
||||||
|
// skip matches out of the crop window.
|
||||||
|
if token.byte_start < byte_start || token.byte_end > byte_end {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if byte_index < token.byte_start {
|
if byte_index < token.byte_start {
|
||||||
formatted.push(&self.text[byte_index..token.byte_start]);
|
formatted.push(&self.text[byte_index..token.byte_start]);
|
||||||
}
|
}
|
||||||
@ -800,6 +797,37 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn format_highlight_crop_phrase_query() {
|
||||||
|
//! testing: https://github.com/meilisearch/meilisearch/issues/3975
|
||||||
|
let temp_index = TempIndex::new();
|
||||||
|
temp_index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{ "id": 1, "text": "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!" }
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let format_options = FormatOptions { highlight: true, crop: Some(10) };
|
||||||
|
let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
|
||||||
|
|
||||||
|
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
|
||||||
|
let mut matcher = builder.build(text);
|
||||||
|
// should return 10 words with a marker at the start as well the end, and the highlighted matches.
|
||||||
|
insta::assert_snapshot!(
|
||||||
|
matcher.format(format_options),
|
||||||
|
@"…had the power to split <em>the</em> <em>world</em> between those who…"
|
||||||
|
);
|
||||||
|
|
||||||
|
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
|
||||||
|
let mut matcher = builder.build(text);
|
||||||
|
// should highlight "those" and the phrase "and those".
|
||||||
|
insta::assert_snapshot!(
|
||||||
|
matcher.format(format_options),
|
||||||
|
@"…world between <em>those</em> who embraced progress <em>and</em> <em>those</em> who resisted…"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn smaller_crop_size() {
|
fn smaller_crop_size() {
|
||||||
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
|
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
|
||||||
|
Loading…
Reference in New Issue
Block a user