mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 12:54:26 +01:00
Merge #3726
3726: Fix prefix highlighting r=loiclec a=ManyTheFish The prefix queries were not properly highlighted, this PR now highlights only the start of a word when it matched with a prefix Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
This commit is contained in:
commit
eace6df91b
@ -52,7 +52,7 @@ impl MatchingWords {
|
|||||||
words.push(LocatedMatchingWords {
|
words.push(LocatedMatchingWords {
|
||||||
value: matching_words,
|
value: matching_words,
|
||||||
positions: located_term.positions.clone(),
|
positions: located_term.positions.clone(),
|
||||||
is_prefix: term.is_cached_prefix(),
|
is_prefix: term.is_prefix(),
|
||||||
original_char_count: term.original_word(&ctx).chars().count(),
|
original_char_count: term.original_word(&ctx).chars().count(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -244,6 +244,8 @@ pub(crate) mod tests {
|
|||||||
temp_index
|
temp_index
|
||||||
.add_documents(documents!([
|
.add_documents(documents!([
|
||||||
{ "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
|
{ "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
|
||||||
|
{ "id": 2, "name": "Westfália" },
|
||||||
|
{ "id": 3, "name": "Ŵôřlḑôle" },
|
||||||
]))
|
]))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
temp_index
|
temp_index
|
||||||
@ -305,7 +307,7 @@ pub(crate) mod tests {
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
})
|
})
|
||||||
.next(),
|
.next(),
|
||||||
None
|
Some(MatchType::Full { char_len: 5, ids: &(2..=2) })
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
matching_words
|
matching_words
|
||||||
|
@ -499,17 +499,36 @@ mod tests {
|
|||||||
use charabia::TokenizerBuilder;
|
use charabia::TokenizerBuilder;
|
||||||
use matching_words::tests::temp_index_with_documents;
|
use matching_words::tests::temp_index_with_documents;
|
||||||
|
|
||||||
use super::super::located_query_terms_from_tokens;
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::SearchContext;
|
use crate::index::tests::TempIndex;
|
||||||
|
use crate::{execute_search, SearchContext};
|
||||||
|
|
||||||
impl<'a> MatcherBuilder<'a, &[u8]> {
|
impl<'a> MatcherBuilder<'a, &[u8]> {
|
||||||
pub fn new_test(mut ctx: SearchContext, query: &'a str) -> Self {
|
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
|
||||||
let tokenizer = TokenizerBuilder::new().build();
|
let mut ctx = SearchContext::new(index, rtxn);
|
||||||
let tokens = tokenizer.tokenize(query);
|
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
|
||||||
let query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
|
&mut ctx,
|
||||||
let matching_words = MatchingWords::new(ctx, query_terms);
|
&Some(query.to_string()),
|
||||||
Self::new(matching_words, TokenizerBuilder::new().build())
|
crate::TermsMatchingStrategy::default(),
|
||||||
|
false,
|
||||||
|
&None,
|
||||||
|
&None,
|
||||||
|
crate::search::new::GeoSortStrategy::default(),
|
||||||
|
0,
|
||||||
|
100,
|
||||||
|
Some(10),
|
||||||
|
&mut crate::DefaultSearchLogger,
|
||||||
|
&mut crate::DefaultSearchLogger,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// consume context and located_query_terms to build MatchingWords.
|
||||||
|
let matching_words = match located_query_terms {
|
||||||
|
Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms),
|
||||||
|
None => MatchingWords::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
MatcherBuilder::new(matching_words, TokenizerBuilder::new().build())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -517,8 +536,7 @@ mod tests {
|
|||||||
fn format_identity() {
|
fn format_identity() {
|
||||||
let temp_index = temp_index_with_documents();
|
let temp_index = temp_index_with_documents();
|
||||||
let rtxn = temp_index.read_txn().unwrap();
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
let ctx = SearchContext::new(&temp_index, &rtxn);
|
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
|
||||||
let builder = MatcherBuilder::new_test(ctx, "split the world");
|
|
||||||
|
|
||||||
let format_options = FormatOptions { highlight: false, crop: None };
|
let format_options = FormatOptions { highlight: false, crop: None };
|
||||||
|
|
||||||
@ -545,8 +563,7 @@ mod tests {
|
|||||||
fn format_highlight() {
|
fn format_highlight() {
|
||||||
let temp_index = temp_index_with_documents();
|
let temp_index = temp_index_with_documents();
|
||||||
let rtxn = temp_index.read_txn().unwrap();
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
let ctx = SearchContext::new(&temp_index, &rtxn);
|
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
|
||||||
let builder = MatcherBuilder::new_test(ctx, "split the world");
|
|
||||||
|
|
||||||
let format_options = FormatOptions { highlight: true, crop: None };
|
let format_options = FormatOptions { highlight: true, crop: None };
|
||||||
|
|
||||||
@ -589,8 +606,7 @@ mod tests {
|
|||||||
fn highlight_unicode() {
|
fn highlight_unicode() {
|
||||||
let temp_index = temp_index_with_documents();
|
let temp_index = temp_index_with_documents();
|
||||||
let rtxn = temp_index.read_txn().unwrap();
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
let ctx = SearchContext::new(&temp_index, &rtxn);
|
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "world");
|
||||||
let builder = MatcherBuilder::new_test(ctx, "world");
|
|
||||||
let format_options = FormatOptions { highlight: true, crop: None };
|
let format_options = FormatOptions { highlight: true, crop: None };
|
||||||
|
|
||||||
// Text containing prefix match.
|
// Text containing prefix match.
|
||||||
@ -599,7 +615,7 @@ mod tests {
|
|||||||
// no crop should return complete text with highlighted matches.
|
// no crop should return complete text with highlighted matches.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@"<em>Ŵôřlḑôle</em>"
|
@"<em>Ŵôřlḑ</em>ôle"
|
||||||
);
|
);
|
||||||
|
|
||||||
// Text containing unicode match.
|
// Text containing unicode match.
|
||||||
@ -611,8 +627,7 @@ mod tests {
|
|||||||
@"<em>Ŵôřlḑ</em>"
|
@"<em>Ŵôřlḑ</em>"
|
||||||
);
|
);
|
||||||
|
|
||||||
let ctx = SearchContext::new(&temp_index, &rtxn);
|
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "westfali");
|
||||||
let builder = MatcherBuilder::new_test(ctx, "westfali");
|
|
||||||
let format_options = FormatOptions { highlight: true, crop: None };
|
let format_options = FormatOptions { highlight: true, crop: None };
|
||||||
|
|
||||||
// Text containing unicode match.
|
// Text containing unicode match.
|
||||||
@ -621,7 +636,7 @@ mod tests {
|
|||||||
// no crop should return complete text with highlighted matches.
|
// no crop should return complete text with highlighted matches.
|
||||||
insta::assert_snapshot!(
|
insta::assert_snapshot!(
|
||||||
matcher.format(format_options),
|
matcher.format(format_options),
|
||||||
@"<em>Westfália</em>"
|
@"<em>Westfáli</em>a"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -629,8 +644,7 @@ mod tests {
|
|||||||
fn format_crop() {
|
fn format_crop() {
|
||||||
let temp_index = temp_index_with_documents();
|
let temp_index = temp_index_with_documents();
|
||||||
let rtxn = temp_index.read_txn().unwrap();
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
let ctx = SearchContext::new(&temp_index, &rtxn);
|
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
|
||||||
let builder = MatcherBuilder::new_test(ctx, "split the world");
|
|
||||||
|
|
||||||
let format_options = FormatOptions { highlight: false, crop: Some(10) };
|
let format_options = FormatOptions { highlight: false, crop: Some(10) };
|
||||||
|
|
||||||
@ -727,8 +741,7 @@ mod tests {
|
|||||||
fn format_highlight_crop() {
|
fn format_highlight_crop() {
|
||||||
let temp_index = temp_index_with_documents();
|
let temp_index = temp_index_with_documents();
|
||||||
let rtxn = temp_index.read_txn().unwrap();
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
let ctx = SearchContext::new(&temp_index, &rtxn);
|
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
|
||||||
let builder = MatcherBuilder::new_test(ctx, "split the world");
|
|
||||||
|
|
||||||
let format_options = FormatOptions { highlight: true, crop: Some(10) };
|
let format_options = FormatOptions { highlight: true, crop: Some(10) };
|
||||||
|
|
||||||
@ -790,8 +803,7 @@ mod tests {
|
|||||||
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
|
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
|
||||||
let temp_index = temp_index_with_documents();
|
let temp_index = temp_index_with_documents();
|
||||||
let rtxn = temp_index.read_txn().unwrap();
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
let ctx = SearchContext::new(&temp_index, &rtxn);
|
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
|
||||||
let builder = MatcherBuilder::new_test(ctx, "split the world");
|
|
||||||
|
|
||||||
let text = "void void split the world void void.";
|
let text = "void void split the world void void.";
|
||||||
|
|
||||||
@ -827,8 +839,8 @@ mod tests {
|
|||||||
fn partial_matches() {
|
fn partial_matches() {
|
||||||
let temp_index = temp_index_with_documents();
|
let temp_index = temp_index_with_documents();
|
||||||
let rtxn = temp_index.read_txn().unwrap();
|
let rtxn = temp_index.read_txn().unwrap();
|
||||||
let ctx = SearchContext::new(&temp_index, &rtxn);
|
let mut builder =
|
||||||
let mut builder = MatcherBuilder::new_test(ctx, "the \"t he\" door \"do or\"");
|
MatcherBuilder::new_test(&rtxn, &temp_index, "the \"t he\" door \"do or\"");
|
||||||
builder.highlight_prefix("_".to_string());
|
builder.highlight_prefix("_".to_string());
|
||||||
builder.highlight_suffix("_".to_string());
|
builder.highlight_suffix("_".to_string());
|
||||||
|
|
||||||
|
@ -470,6 +470,9 @@ impl QueryTerm {
|
|||||||
pub fn is_cached_prefix(&self) -> bool {
|
pub fn is_cached_prefix(&self) -> bool {
|
||||||
self.zero_typo.use_prefix_db.is_some()
|
self.zero_typo.use_prefix_db.is_some()
|
||||||
}
|
}
|
||||||
|
pub fn is_prefix(&self) -> bool {
|
||||||
|
self.is_prefix
|
||||||
|
}
|
||||||
pub fn original_word(&self, ctx: &SearchContext) -> String {
|
pub fn original_word(&self, ctx: &SearchContext) -> String {
|
||||||
ctx.word_interner.get(self.original).clone()
|
ctx.word_interner.get(self.original).clone()
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user