mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Added more tests, fixed issue
This commit is contained in:
parent
561b4836d8
commit
409bc6b424
7 changed files with 368 additions and 423 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -33,8 +33,8 @@ fn get_adjusted_indices_for_too_few_words(
|
|||
let mut is_index_backwards_at_hard_separator = false;
|
||||
let mut is_index_forwards_at_hard_separator = false;
|
||||
|
||||
// false + ends reached because TODO
|
||||
let mut is_crop_size_or_both_ends_reached = is_end_reached && is_beginning_reached;
|
||||
let mut is_crop_size_or_both_ends_reached =
|
||||
words_count == crop_size || (is_end_reached && is_beginning_reached);
|
||||
|
||||
let mut dir = Direction::Forwards;
|
||||
|
||||
|
@ -108,18 +108,38 @@ fn get_adjusted_indices_for_too_few_words(
|
|||
// 2. if forwards index reached a hard separator and backwards is currently hard, we can go backwards
|
||||
}
|
||||
|
||||
// keep advancing forward to check if there's only separator tokens left until the end
|
||||
// if so, then include those too in the index range
|
||||
let mut try_index_forward = valid_index_forward + 1;
|
||||
while let Some(token) = tokens.get(try_index_forward) {
|
||||
if !token.is_separator() {
|
||||
return [valid_index_backward, valid_index_forward];
|
||||
// keep advancing forward and backward to check if there's only separator tokens
|
||||
// left until the end if so, then include those too in the index range
|
||||
|
||||
let saved_index = valid_index_forward;
|
||||
loop {
|
||||
if valid_index_forward == tokens.len() - 1 {
|
||||
break;
|
||||
}
|
||||
|
||||
try_index_forward += 1;
|
||||
valid_index_forward += 1;
|
||||
|
||||
if !tokens[valid_index_forward].is_separator() {
|
||||
valid_index_forward = saved_index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
[valid_index_backward, try_index_forward - 1]
|
||||
let saved_index = valid_index_backward;
|
||||
loop {
|
||||
if valid_index_backward == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
valid_index_backward -= 1;
|
||||
|
||||
if !tokens[valid_index_backward].is_separator() {
|
||||
valid_index_backward = saved_index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
[valid_index_backward, valid_index_forward]
|
||||
}
|
||||
|
||||
fn get_adjusted_index_forward_for_too_many_words(
|
||||
|
@ -158,14 +178,13 @@ pub fn get_adjusted_indices_for_highlights_and_crop_size(
|
|||
crop_size: usize,
|
||||
) -> [usize; 2] {
|
||||
match words_count.cmp(&crop_size) {
|
||||
Ordering::Less => get_adjusted_indices_for_too_few_words(
|
||||
Ordering::Equal | Ordering::Less => get_adjusted_indices_for_too_few_words(
|
||||
tokens,
|
||||
index_backward,
|
||||
index_forward,
|
||||
words_count,
|
||||
crop_size,
|
||||
),
|
||||
Ordering::Equal => [index_backward, index_forward],
|
||||
Ordering::Greater => [
|
||||
index_backward,
|
||||
get_adjusted_index_forward_for_too_many_words(
|
||||
|
|
|
@ -247,12 +247,22 @@ impl MatchingWords {
|
|||
// TODO: There is potentially an optimization to be made here
|
||||
// if we matched a term then we can skip checking it for further iterations?
|
||||
|
||||
println!(
|
||||
"{:?}",
|
||||
self.located_matching_words
|
||||
.iter()
|
||||
.flat_map(|lw| lw.value.iter().map(move |w| (
|
||||
lw.is_prefix,
|
||||
lw.original_char_count,
|
||||
self.word_interner.get(*w)
|
||||
)))
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
|
||||
self.located_matching_words
|
||||
.iter()
|
||||
.flat_map(|lw| lw.value.iter().map(move |w| (lw, w)))
|
||||
.flat_map(|lw| lw.value.iter().map(move |w| (lw, self.word_interner.get(*w))))
|
||||
.find_map(|(located_words, word)| {
|
||||
let word = self.word_interner.get(*word);
|
||||
|
||||
let [char_count, byte_len] =
|
||||
match PrefixedOrEquality::new(tph.token.lemma(), word, located_words.is_prefix)
|
||||
{
|
||||
|
@ -368,93 +378,105 @@ impl Debug for MatchingWords {
|
|||
}
|
||||
}
|
||||
|
||||
// #[cfg(test)]
|
||||
// pub(crate) mod tests {
|
||||
// use super::super::super::located_query_terms_from_tokens;
|
||||
// use super::*;
|
||||
// use crate::search::new::matches::tests::temp_index_with_documents;
|
||||
// use crate::search::new::query_term::ExtractedTokens;
|
||||
// use charabia::{TokenKind, TokenizerBuilder};
|
||||
// use std::borrow::Cow;
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::super::super::located_query_terms_from_tokens;
|
||||
use super::*;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::query_term::ExtractedTokens;
|
||||
use charabia::{TokenKind, TokenizerBuilder};
|
||||
use std::borrow::Cow;
|
||||
|
||||
// #[test]
|
||||
// fn matching_words() {
|
||||
// let temp_index = temp_index_with_documents(None);
|
||||
// let rtxn = temp_index.read_txn().unwrap();
|
||||
// let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
|
||||
// let mut builder = TokenizerBuilder::default();
|
||||
// let tokenizer = builder.build();
|
||||
// let text = "split this world";
|
||||
// let tokens = tokenizer.tokenize(text);
|
||||
// let ExtractedTokens { query_terms, .. } =
|
||||
// located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
|
||||
// let matching_words = MatchingWords::new(ctx, &query_terms);
|
||||
fn temp_index_with_documents() -> TempIndex {
|
||||
let temp_index = TempIndex::new();
|
||||
temp_index
|
||||
.add_documents(documents!([
|
||||
{ "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
|
||||
{ "id": 2, "name": "Westfália" },
|
||||
{ "id": 3, "name": "Ŵôřlḑôle" },
|
||||
]))
|
||||
.unwrap();
|
||||
temp_index
|
||||
}
|
||||
|
||||
// assert_eq!(
|
||||
// matching_words.get_matches_and_query_positions(
|
||||
// &[
|
||||
// Token {
|
||||
// kind: TokenKind::Word,
|
||||
// lemma: Cow::Borrowed("split"),
|
||||
// char_end: "split".chars().count(),
|
||||
// byte_end: "split".len(),
|
||||
// ..Default::default()
|
||||
// },
|
||||
// Token {
|
||||
// kind: TokenKind::Word,
|
||||
// lemma: Cow::Borrowed("nyc"),
|
||||
// char_end: "nyc".chars().count(),
|
||||
// byte_end: "nyc".len(),
|
||||
// ..Default::default()
|
||||
// },
|
||||
// Token {
|
||||
// kind: TokenKind::Word,
|
||||
// lemma: Cow::Borrowed("world"),
|
||||
// char_end: "world".chars().count(),
|
||||
// byte_end: "world".len(),
|
||||
// ..Default::default()
|
||||
// },
|
||||
// Token {
|
||||
// kind: TokenKind::Word,
|
||||
// lemma: Cow::Borrowed("worlded"),
|
||||
// char_end: "worlded".chars().count(),
|
||||
// byte_end: "worlded".len(),
|
||||
// ..Default::default()
|
||||
// },
|
||||
// Token {
|
||||
// kind: TokenKind::Word,
|
||||
// lemma: Cow::Borrowed("thisnew"),
|
||||
// char_end: "thisnew".chars().count(),
|
||||
// byte_end: "thisnew".len(),
|
||||
// ..Default::default()
|
||||
// }
|
||||
// ],
|
||||
// text
|
||||
// ),
|
||||
// (
|
||||
// vec![
|
||||
// Match {
|
||||
// char_count: 5,
|
||||
// byte_len: 5,
|
||||
// position: MatchPosition::Word { word_position: 0, token_position: 0 }
|
||||
// },
|
||||
// Match {
|
||||
// char_count: 5,
|
||||
// byte_len: 5,
|
||||
// position: MatchPosition::Word { word_position: 2, token_position: 2 }
|
||||
// },
|
||||
// Match {
|
||||
// char_count: 5,
|
||||
// byte_len: 5,
|
||||
// position: MatchPosition::Word { word_position: 3, token_position: 3 }
|
||||
// }
|
||||
// ],
|
||||
// vec![
|
||||
// QueryPosition { range: [0, 0], index: 0 },
|
||||
// QueryPosition { range: [2, 2], index: 1 },
|
||||
// QueryPosition { range: [2, 2], index: 2 }
|
||||
// ]
|
||||
// )
|
||||
// );
|
||||
// }
|
||||
// }
|
||||
#[test]
|
||||
fn matching_words() {
|
||||
let temp_index = temp_index_with_documents();
|
||||
let rtxn = temp_index.read_txn().unwrap();
|
||||
let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
|
||||
let mut builder = TokenizerBuilder::default();
|
||||
let tokenizer = builder.build();
|
||||
let text = "split this world";
|
||||
let tokens = tokenizer.tokenize(text);
|
||||
let ExtractedTokens { query_terms, .. } =
|
||||
located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
|
||||
let matching_words = MatchingWords::new(ctx, &query_terms);
|
||||
|
||||
assert_eq!(
|
||||
matching_words.get_matches_and_query_positions(
|
||||
&[
|
||||
Token {
|
||||
kind: TokenKind::Word,
|
||||
lemma: Cow::Borrowed("split"),
|
||||
char_end: "split".chars().count(),
|
||||
byte_end: "split".len(),
|
||||
..Default::default()
|
||||
},
|
||||
Token {
|
||||
kind: TokenKind::Word,
|
||||
lemma: Cow::Borrowed("nyc"),
|
||||
char_end: "nyc".chars().count(),
|
||||
byte_end: "nyc".len(),
|
||||
..Default::default()
|
||||
},
|
||||
Token {
|
||||
kind: TokenKind::Word,
|
||||
lemma: Cow::Borrowed("world"),
|
||||
char_end: "world".chars().count(),
|
||||
byte_end: "world".len(),
|
||||
..Default::default()
|
||||
},
|
||||
Token {
|
||||
kind: TokenKind::Word,
|
||||
lemma: Cow::Borrowed("worlded"),
|
||||
char_end: "worlded".chars().count(),
|
||||
byte_end: "worlded".len(),
|
||||
..Default::default()
|
||||
},
|
||||
Token {
|
||||
kind: TokenKind::Word,
|
||||
lemma: Cow::Borrowed("thisnew"),
|
||||
char_end: "thisnew".chars().count(),
|
||||
byte_end: "thisnew".len(),
|
||||
..Default::default()
|
||||
}
|
||||
],
|
||||
text
|
||||
),
|
||||
(
|
||||
vec![
|
||||
Match {
|
||||
char_count: 5,
|
||||
byte_len: 5,
|
||||
position: MatchPosition::Word { word_position: 0, token_position: 0 }
|
||||
},
|
||||
Match {
|
||||
char_count: 5,
|
||||
byte_len: 5,
|
||||
position: MatchPosition::Word { word_position: 2, token_position: 2 }
|
||||
},
|
||||
Match {
|
||||
char_count: 5,
|
||||
byte_len: 5,
|
||||
position: MatchPosition::Word { word_position: 3, token_position: 3 }
|
||||
}
|
||||
],
|
||||
vec![
|
||||
QueryPosition { range: [0, 0], index: 0 },
|
||||
QueryPosition { range: [2, 2], index: 1 },
|
||||
QueryPosition { range: [2, 2], index: 2 }
|
||||
]
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -200,7 +200,7 @@ mod tests {
|
|||
format_options: Option<FormatOptions>,
|
||||
text: &str,
|
||||
query: &str,
|
||||
expected_text: &str,
|
||||
expected_maybe_text: Option<&str>,
|
||||
) {
|
||||
let temp_index = TempIndex::new();
|
||||
|
||||
|
@ -216,7 +216,28 @@ mod tests {
|
|||
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query);
|
||||
let mut matcher = builder.build(text, None);
|
||||
|
||||
assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
|
||||
assert_eq!(
|
||||
matcher.get_formatted_text(format_options),
|
||||
expected_maybe_text.map(|v| v.to_string())
|
||||
);
|
||||
}
|
||||
|
||||
struct FormatVariations<'a> {
|
||||
highlight_with_crop: Option<&'a str>,
|
||||
highlight: Option<&'a str>,
|
||||
crop: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl<'a> FormatVariations<'a> {
|
||||
fn get(&self) -> [(Option<FormatOptions>, Option<&'a str>); 5] {
|
||||
[
|
||||
(None, None),
|
||||
(Some(FormatOptions { highlight: true, crop: Some(2) }), self.highlight_with_crop),
|
||||
(Some(FormatOptions { highlight: true, crop: None }), self.highlight),
|
||||
(Some(FormatOptions { highlight: false, crop: Some(2) }), self.crop),
|
||||
(Some(FormatOptions { highlight: false, crop: None }), None),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
/// "Dei store fiskane eta dei små — dei liger under som minst förmå."
|
||||
|
@ -225,77 +246,66 @@ mod tests {
|
|||
fn rename_me_with_base_text(
|
||||
format_options: Option<FormatOptions>,
|
||||
query: &str,
|
||||
expected_text: &str,
|
||||
expected_maybe_text: Option<&str>,
|
||||
) {
|
||||
rename_me(
|
||||
format_options,
|
||||
"Dei store fiskane eta dei små — dei liger under som minst förmå.",
|
||||
query,
|
||||
expected_text,
|
||||
expected_maybe_text,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn phrase_highlight_bigger_than_crop() {
|
||||
rename_me_with_base_text(
|
||||
Some(FormatOptions { highlight: true, crop: Some(1) }),
|
||||
"\"dei liger\"",
|
||||
"…<em>dei</em>…",
|
||||
);
|
||||
fn empty_query() {
|
||||
for (format_options, expected_maybe_text) in (FormatVariations {
|
||||
highlight_with_crop: Some("Dei store…"),
|
||||
highlight: None,
|
||||
crop: Some("Dei store…"),
|
||||
}
|
||||
.get())
|
||||
{
|
||||
rename_me_with_base_text(format_options, "", expected_maybe_text);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn phrase_highlight_same_size_as_crop() {
|
||||
rename_me_with_base_text(
|
||||
Some(FormatOptions { highlight: true, crop: Some(2) }),
|
||||
"\"dei liger\"",
|
||||
"…<em>dei liger</em>…",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn phrase_highlight_crop_middle() {
|
||||
rename_me_with_base_text(
|
||||
Some(FormatOptions { highlight: true, crop: Some(4) }),
|
||||
"\"dei liger\"",
|
||||
"…små — <em>dei liger</em> under…",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn phrase_highlight_crop_end() {
|
||||
rename_me_with_base_text(
|
||||
Some(FormatOptions { highlight: true, crop: Some(4) }),
|
||||
"\"minst förmå\"",
|
||||
"…under som <em>minst förmå</em>.",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn phrase_highlight_crop_beginning() {
|
||||
rename_me_with_base_text(
|
||||
Some(FormatOptions { highlight: true, crop: Some(4) }),
|
||||
"\"Dei store\"",
|
||||
"<em>Dei store</em> fiskane eta…",
|
||||
);
|
||||
fn only_separators() {
|
||||
for (format_options, expected_maybe_text) in (FormatVariations {
|
||||
highlight_with_crop: Some(":-…"),
|
||||
highlight: None,
|
||||
crop: Some(":-…"),
|
||||
}
|
||||
.get())
|
||||
{
|
||||
rename_me(format_options, ":-)", ":-)", expected_maybe_text);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlight_end() {
|
||||
rename_me_with_base_text(
|
||||
Some(FormatOptions { highlight: true, crop: None }),
|
||||
"minst förmå",
|
||||
"Dei store fiskane eta dei små — dei liger under som <em>minst</em> <em>förmå</em>.",
|
||||
);
|
||||
// TODO: Why is "förmå" marked as prefix in located matching words?
|
||||
for (format_options, expected_maybe_text) in (FormatVariations {
|
||||
highlight_with_crop: Some("…<em>minst</em> <em>förmå</em>."),
|
||||
highlight: Some("Dei store fiskane eta dei små — dei liger under som <em>minst</em> <em>förmå</em>."),
|
||||
crop: Some("…minst förmå."),
|
||||
}
|
||||
.get()) {
|
||||
rename_me_with_base_text(format_options, "minst förmå", expected_maybe_text);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlight_beginning_and_middle() {
|
||||
rename_me_with_base_text(
|
||||
Some(FormatOptions { highlight: true, crop: None }),
|
||||
"Dei store",
|
||||
"<em>Dei</em> <em>store</em> fiskane eta <em>dei</em> små — <em>dei</em> liger under som minst förmå.",
|
||||
);
|
||||
// TODO: Why is "store" marked as prefix in located matching words?
|
||||
for (format_options, expected_maybe_text) in (FormatVariations {
|
||||
highlight_with_crop: Some("<em>Dei</em> <em>store</em>…"),
|
||||
highlight: Some("<em>Dei</em> <em>store</em> fiskane eta <em>dei</em> små — <em>dei</em> liger under som minst förmå."),
|
||||
crop: Some("Dei store…"),
|
||||
}
|
||||
.get()) {
|
||||
rename_me_with_base_text(format_options, "Dei store", expected_maybe_text);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -306,291 +316,185 @@ mod tests {
|
|||
// `milli::search::new::query_term::QueryTerm::all_computed_derivations` might be at fault here
|
||||
|
||||
// interned words = ["forma"]
|
||||
rename_me(
|
||||
Some(FormatOptions { highlight: true, crop: None }),
|
||||
"altså, förmå, på en måte",
|
||||
"fo",
|
||||
"altså, <em>förmå</em>, på en måte",
|
||||
);
|
||||
for (format_options, expected_maybe_text) in (FormatVariations {
|
||||
highlight_with_crop: Some("…<em>förmå</em>, på…"),
|
||||
highlight: Some("altså, <em>förmå</em>, på en måte"),
|
||||
crop: Some("…förmå, på…"),
|
||||
}
|
||||
.get())
|
||||
{
|
||||
rename_me(format_options, "altså, förmå, på en måte", "fo", expected_maybe_text);
|
||||
}
|
||||
|
||||
// interned words = ["fo", "forma"]
|
||||
rename_me(
|
||||
Some(FormatOptions { highlight: true, crop: None }),
|
||||
"altså, fo förmå, på en måte",
|
||||
"fo",
|
||||
"altså, <em>fo</em> <em>fö</em>rmå, på en måte",
|
||||
);
|
||||
for (format_options, expected_maybe_text) in (FormatVariations {
|
||||
highlight_with_crop: Some("…<em>fo</em> <em>fö</em>rmå…"),
|
||||
highlight: Some("altså, <em>fo</em> <em>fö</em>rmå, på en måte"),
|
||||
crop: Some("…fo förmå…"),
|
||||
}
|
||||
.get())
|
||||
{
|
||||
rename_me(format_options, "altså, fo förmå, på en måte", "fo", expected_maybe_text);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn partial_match_end() {
|
||||
rename_me(
|
||||
Some(FormatOptions { highlight: true, crop: None }),
|
||||
"förmå, på en måte",
|
||||
"fo",
|
||||
"<em>förmå</em>, på en måte",
|
||||
);
|
||||
for (format_options, expected_maybe_text) in (FormatVariations {
|
||||
highlight_with_crop: Some("<em>förmå</em>, på…"),
|
||||
highlight: Some("<em>förmå</em>, på en måte"),
|
||||
crop: Some("förmå, på…"),
|
||||
}
|
||||
.get())
|
||||
{
|
||||
rename_me(format_options, "förmå, på en måte", "fo", expected_maybe_text);
|
||||
}
|
||||
|
||||
rename_me(
|
||||
Some(FormatOptions { highlight: true, crop: None }),
|
||||
"fo förmå, på en måte",
|
||||
"fo",
|
||||
"<em>fo</em> <em>fö</em>rmå, på en måte",
|
||||
);
|
||||
for (format_options, expected_maybe_text) in (FormatVariations {
|
||||
highlight_with_crop: Some("<em>fo</em> <em>fö</em>rmå…"),
|
||||
highlight: Some("<em>fo</em> <em>fö</em>rmå, på en måte"),
|
||||
crop: Some("fo förmå…"),
|
||||
}
|
||||
.get())
|
||||
{
|
||||
rename_me(format_options, "fo förmå, på en måte", "fo", expected_maybe_text);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn partial_match_beginning() {
|
||||
rename_me(
|
||||
Some(FormatOptions { highlight: true, crop: None }),
|
||||
"altså, förmå",
|
||||
"fo",
|
||||
"altså, <em>förmå</em>",
|
||||
);
|
||||
for (format_options, expected_maybe_text) in (FormatVariations {
|
||||
highlight_with_crop: Some("altså, <em>förmå</em>"),
|
||||
highlight: Some("altså, <em>förmå</em>"),
|
||||
crop: Some("altså, förmå"),
|
||||
}
|
||||
.get())
|
||||
{
|
||||
rename_me(format_options, "altså, förmå", "fo", expected_maybe_text);
|
||||
}
|
||||
|
||||
rename_me(
|
||||
Some(FormatOptions { highlight: true, crop: None }),
|
||||
"altså, fo förmå",
|
||||
"fo",
|
||||
"altså, <em>fo</em> <em>fö</em>rmå",
|
||||
for (format_options, expected_maybe_text) in (FormatVariations {
|
||||
highlight_with_crop: Some("…<em>fo</em> <em>fö</em>rmå"),
|
||||
highlight: Some("altså, <em>fo</em> <em>fö</em>rmå"),
|
||||
crop: Some("…fo förmå"),
|
||||
}
|
||||
.get())
|
||||
{
|
||||
rename_me(format_options, "altså, fo förmå", "fo", expected_maybe_text);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn separator_at_end() {
|
||||
for (format_options, expected_maybe_text) in (FormatVariations {
|
||||
highlight_with_crop: Some("…<em>minst</em> förmå. , ;"),
|
||||
highlight: Some("; , — dei liger under som <em>minst</em> förmå. , ;"),
|
||||
crop: Some("…minst förmå. , ;"),
|
||||
}
|
||||
.get())
|
||||
{
|
||||
rename_me(
|
||||
format_options,
|
||||
"; , — dei liger under som minst förmå. , ;",
|
||||
"minst",
|
||||
expected_maybe_text,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn separator_at_beginning() {
|
||||
for (format_options, expected_maybe_text) in (FormatVariations {
|
||||
highlight_with_crop: Some("; , — <em>dei</em> liger…"),
|
||||
highlight: Some("; , — <em>dei</em> liger under som minst förmå. , ;"),
|
||||
crop: Some("; , — dei liger…"),
|
||||
}
|
||||
.get())
|
||||
{
|
||||
rename_me(
|
||||
format_options,
|
||||
"; , — dei liger under som minst förmå. , ;",
|
||||
"dei",
|
||||
expected_maybe_text,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn phrase() {
|
||||
for (format_options, expected_maybe_text) in (FormatVariations {
|
||||
highlight_with_crop: Some("…<em>dei liger</em>…"),
|
||||
highlight: Some(
|
||||
"Dei store fiskane eta dei små — <em>dei liger</em> under som minst förmå.",
|
||||
),
|
||||
crop: Some("…dei liger…"),
|
||||
}
|
||||
.get())
|
||||
{
|
||||
rename_me_with_base_text(format_options, "\"dei liger\"", expected_maybe_text);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn phrase_highlight_bigger_than_crop() {
|
||||
rename_me_with_base_text(
|
||||
Some(FormatOptions { highlight: true, crop: Some(1) }),
|
||||
"\"dei liger\"",
|
||||
Some("…<em>dei</em>…"),
|
||||
);
|
||||
}
|
||||
|
||||
// #[test]
|
||||
// fn format_identity() {
|
||||
// let temp_index = temp_index_with_documents(None);
|
||||
// let rtxn = temp_index.read_txn().unwrap();
|
||||
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
|
||||
// let format_options = Some(FormatOptions { highlight: false, crop: None });
|
||||
#[test]
|
||||
fn phrase_bigger_than_crop() {
|
||||
rename_me_with_base_text(
|
||||
Some(FormatOptions { highlight: false, crop: Some(1) }),
|
||||
"\"dei liger\"",
|
||||
Some("…dei…"),
|
||||
);
|
||||
}
|
||||
|
||||
// let test_values = [
|
||||
// // Text without any match.
|
||||
// "A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
|
||||
// // Text containing all matches.
|
||||
// "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
|
||||
// // Text containing some matches.
|
||||
// "Natalie risk her future to build a world with the boy she loves."
|
||||
// ];
|
||||
#[test]
|
||||
fn phrase_highlight_crop_middle() {
|
||||
rename_me_with_base_text(
|
||||
Some(FormatOptions { highlight: true, crop: Some(4) }),
|
||||
"\"dei liger\"",
|
||||
Some("…små — <em>dei liger</em> under…"),
|
||||
);
|
||||
}
|
||||
|
||||
// for text in test_values {
|
||||
// let mut matcher = builder.build(text, None);
|
||||
// // no crop and no highlight should return complete text.
|
||||
// assert_eq!(matcher.get_formatted_text(format_options), None);
|
||||
// }
|
||||
// }
|
||||
#[test]
|
||||
fn phrase_crop_middle() {
|
||||
rename_me_with_base_text(
|
||||
Some(FormatOptions { highlight: false, crop: Some(4) }),
|
||||
"\"dei liger\"",
|
||||
Some("…små — dei liger under…"),
|
||||
);
|
||||
}
|
||||
|
||||
// #[test]
|
||||
// fn format_highlight() {
|
||||
// let temp_index = temp_index_with_documents(None);
|
||||
// let rtxn = temp_index.read_txn().unwrap();
|
||||
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
|
||||
// let format_options = Some(FormatOptions { highlight: true, crop: None });
|
||||
#[test]
|
||||
fn phrase_highlight_crop_end() {
|
||||
rename_me_with_base_text(
|
||||
Some(FormatOptions { highlight: true, crop: Some(4) }),
|
||||
"\"minst förmå\"",
|
||||
Some("…under som <em>minst förmå</em>."),
|
||||
);
|
||||
}
|
||||
|
||||
// let test_values = [
|
||||
// // empty text.
|
||||
// ["", ""],
|
||||
// // text containing only separators.
|
||||
// [":-)", ":-)"],
|
||||
// // Text without any match.
|
||||
// ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
|
||||
// "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"],
|
||||
// // Text containing all matches.
|
||||
// ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
|
||||
// "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."],
|
||||
// // Text containing some matches.
|
||||
// ["Natalie risk her future to build a world with the boy she loves.",
|
||||
// "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves."],
|
||||
// ];
|
||||
#[test]
|
||||
fn phrase_crop_end() {
|
||||
rename_me_with_base_text(
|
||||
Some(FormatOptions { highlight: false, crop: Some(4) }),
|
||||
"\"minst förmå\"",
|
||||
Some("…under som minst förmå."),
|
||||
);
|
||||
}
|
||||
|
||||
// for [text, expected_text] in test_values {
|
||||
// let mut matcher = builder.build(text, None);
|
||||
// // no crop should return complete text with highlighted matches.
|
||||
// assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
|
||||
// }
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn highlight_unicode() {
|
||||
// let temp_index = temp_index_with_documents(None);
|
||||
// let rtxn = temp_index.read_txn().unwrap();
|
||||
// let format_options = Some(FormatOptions { highlight: true, crop: None });
|
||||
|
||||
// let test_values = [
|
||||
// // Text containing prefix match.
|
||||
// ["world", "Ŵôřlḑôle", "<em>Ŵôřlḑ</em>ôle"],
|
||||
// // Text containing unicode match.
|
||||
// ["world", "Ŵôřlḑ", "<em>Ŵôřlḑ</em>"],
|
||||
// // Text containing unicode match.
|
||||
// ["westfali", "Westfália", "<em>Westfáli</em>a"],
|
||||
// ];
|
||||
|
||||
// for [query, text, expected_text] in test_values {
|
||||
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query);
|
||||
// let mut matcher = builder.build(text, None);
|
||||
// // no crop should return complete text with highlighted matches.
|
||||
// assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
|
||||
// }
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn format_crop() {
|
||||
// let temp_index = temp_index_with_documents(None);
|
||||
// let rtxn = temp_index.read_txn().unwrap();
|
||||
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
|
||||
// let format_options = Some(FormatOptions { highlight: false, crop: Some(10) });
|
||||
|
||||
// let test_values = [
|
||||
// // empty text.
|
||||
// // ["", ""],
|
||||
// // text containing only separators.
|
||||
// // [":-)", ":-)"],
|
||||
// // Text without any match.
|
||||
// ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
|
||||
// "A quick brown fox can not jump 32 feet, right…"],
|
||||
// // Text without any match starting by a separator.
|
||||
// ["(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)",
|
||||
// "(A quick brown fox can not jump 32 feet, right…" ],
|
||||
// // Test phrase propagation
|
||||
// ["Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.",
|
||||
// "…Split The World is a book written by Emily Henry…"],
|
||||
// // Text containing some matches.
|
||||
// ["Natalie risk her future to build a world with the boy she loves.",
|
||||
// "…future to build a world with the boy she loves."],
|
||||
// // Text containing all matches.
|
||||
// ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
|
||||
// "…she loves. Emily Henry: The Love That Split The World."],
|
||||
// // Text containing a match unordered and a match ordered.
|
||||
// ["The world split void void void void void void void void void split the world void void",
|
||||
// "…void void void void void split the world void void"],
|
||||
// // Text containing matches with different density.
|
||||
// ["split void the void void world void void void void void void void void void void split the world void void",
|
||||
// "…void void void void void split the world void void"],
|
||||
// ["split split split split split split void void void void void void void void void void split the world void void",
|
||||
// "…void void void void void split the world void void"]
|
||||
// ];
|
||||
|
||||
// for [text, expected_text] in test_values {
|
||||
// let mut matcher = builder.build(text, None);
|
||||
// // no crop should return complete text with highlighted matches.
|
||||
// assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
|
||||
// }
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn format_highlight_crop() {
|
||||
// let temp_index = temp_index_with_documents(None);
|
||||
// let rtxn = temp_index.read_txn().unwrap();
|
||||
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
|
||||
// let format_options = Some(FormatOptions { highlight: true, crop: Some(10) });
|
||||
|
||||
// let test_values = [
|
||||
// // empty text.
|
||||
// ["", ""],
|
||||
// // text containing only separators.
|
||||
// [":-)", ":-)"],
|
||||
// // Text without any match.
|
||||
// ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
|
||||
// "A quick brown fox can not jump 32 feet, right…"],
|
||||
// // Text containing some matches.
|
||||
// ["Natalie risk her future to build a world with the boy she loves.",
|
||||
// "…future to build a <em>world</em> with <em>the</em> boy she loves."],
|
||||
// // Text containing all matches.
|
||||
// ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
|
||||
// "…she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."],
|
||||
// // Text containing a match unordered and a match ordered.
|
||||
// ["The world split void void void void void void void void void split the world void void",
|
||||
// "…void void void void void <em>split</em> <em>the</em> <em>world</em> void void"]
|
||||
// ];
|
||||
|
||||
// for [text, expected_text] in test_values {
|
||||
// let mut matcher = builder.build(text, None);
|
||||
// // no crop should return complete text with highlighted matches.
|
||||
// assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
|
||||
// }
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn format_highlight_crop_phrase_query() {
|
||||
// //! testing: https://github.com/meilisearch/meilisearch/issues/3975
|
||||
// let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
|
||||
// let temp_index = temp_index_with_documents(Some(documents!([
|
||||
// { "id": 1, "text": text }
|
||||
// ])));
|
||||
// let rtxn = temp_index.read_txn().unwrap();
|
||||
|
||||
// let format_options = Some(FormatOptions { highlight: true, crop: Some(10) });
|
||||
|
||||
// let test_values = [
|
||||
// // should return 10 words with a marker at the start as well the end, and the highlighted matches.
|
||||
// ["\"the world\"",
|
||||
// "…the power to split <em>the world</em> between those who embraced…"],
|
||||
// // should highlight "those" and the phrase "and those".
|
||||
// ["those \"and those\"",
|
||||
// "…world between <em>those</em> who embraced progress <em>and those</em> who resisted…"],
|
||||
// ["\"The groundbreaking invention had the power to split the world\"",
|
||||
// "<em>The groundbreaking invention had the power to split the world</em>…"],
|
||||
// ["\"The groundbreaking invention had the power to split the world between those\"",
|
||||
// "<em>The groundbreaking invention had the power to split the world</em>…"],
|
||||
// ["\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"",
|
||||
// "…between those who <em>embraced progress and those who resisted change</em>!"],
|
||||
// ["\"groundbreaking invention\" \"split the world between\"",
|
||||
// "…<em>groundbreaking invention</em> had the power to <em>split the world between</em>…"],
|
||||
// ["\"groundbreaking invention\" \"had the power to split the world between those\"",
|
||||
// "…<em>invention</em> <em>had the power to split the world between those</em>…"],
|
||||
// ];
|
||||
|
||||
// for [query, expected_text] in test_values {
|
||||
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query);
|
||||
// let mut matcher = builder.build(text, None);
|
||||
|
||||
// assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
|
||||
// }
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn smaller_crop_size() {
|
||||
// //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
|
||||
// let temp_index = temp_index_with_documents(None);
|
||||
// let rtxn = temp_index.read_txn().unwrap();
|
||||
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
|
||||
// let text = "void void split the world void void.";
|
||||
// let mut matcher = builder.build(text, None);
|
||||
|
||||
// let test_values = [
|
||||
// // set a smaller crop size
|
||||
// // because crop size < query size, partially format matches.
|
||||
// (2, "…split the…"),
|
||||
// // set a smaller crop size
|
||||
// // because crop size < query size, partially format matches.
|
||||
// (1, "…split…"),
|
||||
// // set crop size to 0
|
||||
// // because crop size is 0, crop is ignored.
|
||||
// (0, "void void split the world void void."),
|
||||
// ];
|
||||
|
||||
// for (crop_size, expected_text) in test_values {
|
||||
// // set a smaller crop size
|
||||
// let format_options = Some(FormatOptions { highlight: false, crop: Some(crop_size) });
|
||||
// assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
|
||||
// }
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn partial_matches() {
|
||||
// let temp_index = temp_index_with_documents(None);
|
||||
// let rtxn = temp_index.read_txn().unwrap();
|
||||
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "the \"t he\" door \"do or\"");
|
||||
|
||||
// let format_options = Some(FormatOptions { highlight: true, crop: None });
|
||||
|
||||
// let text = "the do or die can't be he do and or isn't he";
|
||||
// let mut matcher = builder.build(text, None);
|
||||
// assert_eq!(
|
||||
// matcher.get_formatted_text(format_options),
|
||||
// Some(
|
||||
// "<em>the</em> <em>do or</em> die can't be he do and or isn'<em>t he</em>"
|
||||
// .to_string()
|
||||
// )
|
||||
// );
|
||||
// }
|
||||
#[test]
|
||||
fn phrase_highlight_crop_beginning() {
|
||||
rename_me_with_base_text(
|
||||
Some(FormatOptions { highlight: true, crop: Some(4) }),
|
||||
"\"Dei store\"",
|
||||
Some("<em>Dei store</em> fiskane eta…"),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue