Added more tests, fixed issue

This commit is contained in:
F. Levi 2025-06-20 09:46:39 +03:00
parent 561b4836d8
commit 409bc6b424
7 changed files with 368 additions and 423 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -33,8 +33,8 @@ fn get_adjusted_indices_for_too_few_words(
let mut is_index_backwards_at_hard_separator = false;
let mut is_index_forwards_at_hard_separator = false;
// false + ends reached because TODO
let mut is_crop_size_or_both_ends_reached = is_end_reached && is_beginning_reached;
let mut is_crop_size_or_both_ends_reached =
words_count == crop_size || (is_end_reached && is_beginning_reached);
let mut dir = Direction::Forwards;
@ -108,18 +108,38 @@ fn get_adjusted_indices_for_too_few_words(
// 2. if forwards index reached a hard separator and backwards is currently hard, we can go backwards
}
// keep advancing forward to check if there's only separator tokens left until the end
// if so, then include those too in the index range
let mut try_index_forward = valid_index_forward + 1;
while let Some(token) = tokens.get(try_index_forward) {
if !token.is_separator() {
return [valid_index_backward, valid_index_forward];
// keep advancing forward and backward to check if there's only separator tokens
// left until the end if so, then include those too in the index range
let saved_index = valid_index_forward;
loop {
if valid_index_forward == tokens.len() - 1 {
break;
}
try_index_forward += 1;
valid_index_forward += 1;
if !tokens[valid_index_forward].is_separator() {
valid_index_forward = saved_index;
break;
}
}
[valid_index_backward, try_index_forward - 1]
let saved_index = valid_index_backward;
loop {
if valid_index_backward == 0 {
break;
}
valid_index_backward -= 1;
if !tokens[valid_index_backward].is_separator() {
valid_index_backward = saved_index;
break;
}
}
[valid_index_backward, valid_index_forward]
}
fn get_adjusted_index_forward_for_too_many_words(
@ -158,14 +178,13 @@ pub fn get_adjusted_indices_for_highlights_and_crop_size(
crop_size: usize,
) -> [usize; 2] {
match words_count.cmp(&crop_size) {
Ordering::Less => get_adjusted_indices_for_too_few_words(
Ordering::Equal | Ordering::Less => get_adjusted_indices_for_too_few_words(
tokens,
index_backward,
index_forward,
words_count,
crop_size,
),
Ordering::Equal => [index_backward, index_forward],
Ordering::Greater => [
index_backward,
get_adjusted_index_forward_for_too_many_words(

View file

@ -247,12 +247,22 @@ impl MatchingWords {
// TODO: There is potentially an optimization to be made here
// if we matched a term then we can skip checking it for further iterations?
println!(
"{:?}",
self.located_matching_words
.iter()
.flat_map(|lw| lw.value.iter().map(move |w| (
lw.is_prefix,
lw.original_char_count,
self.word_interner.get(*w)
)))
.collect::<Vec<_>>()
);
self.located_matching_words
.iter()
.flat_map(|lw| lw.value.iter().map(move |w| (lw, w)))
.flat_map(|lw| lw.value.iter().map(move |w| (lw, self.word_interner.get(*w))))
.find_map(|(located_words, word)| {
let word = self.word_interner.get(*word);
let [char_count, byte_len] =
match PrefixedOrEquality::new(tph.token.lemma(), word, located_words.is_prefix)
{
@ -368,93 +378,105 @@ impl Debug for MatchingWords {
}
}
// #[cfg(test)]
// pub(crate) mod tests {
// use super::super::super::located_query_terms_from_tokens;
// use super::*;
// use crate::search::new::matches::tests::temp_index_with_documents;
// use crate::search::new::query_term::ExtractedTokens;
// use charabia::{TokenKind, TokenizerBuilder};
// use std::borrow::Cow;
#[cfg(test)]
mod tests {
use super::super::super::located_query_terms_from_tokens;
use super::*;
use crate::index::tests::TempIndex;
use crate::search::new::query_term::ExtractedTokens;
use charabia::{TokenKind, TokenizerBuilder};
use std::borrow::Cow;
// #[test]
// fn matching_words() {
// let temp_index = temp_index_with_documents(None);
// let rtxn = temp_index.read_txn().unwrap();
// let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
// let mut builder = TokenizerBuilder::default();
// let tokenizer = builder.build();
// let text = "split this world";
// let tokens = tokenizer.tokenize(text);
// let ExtractedTokens { query_terms, .. } =
// located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
// let matching_words = MatchingWords::new(ctx, &query_terms);
fn temp_index_with_documents() -> TempIndex {
let temp_index = TempIndex::new();
temp_index
.add_documents(documents!([
{ "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
{ "id": 2, "name": "Westfália" },
{ "id": 3, "name": "Ŵôřlḑôle" },
]))
.unwrap();
temp_index
}
// assert_eq!(
// matching_words.get_matches_and_query_positions(
// &[
// Token {
// kind: TokenKind::Word,
// lemma: Cow::Borrowed("split"),
// char_end: "split".chars().count(),
// byte_end: "split".len(),
// ..Default::default()
// },
// Token {
// kind: TokenKind::Word,
// lemma: Cow::Borrowed("nyc"),
// char_end: "nyc".chars().count(),
// byte_end: "nyc".len(),
// ..Default::default()
// },
// Token {
// kind: TokenKind::Word,
// lemma: Cow::Borrowed("world"),
// char_end: "world".chars().count(),
// byte_end: "world".len(),
// ..Default::default()
// },
// Token {
// kind: TokenKind::Word,
// lemma: Cow::Borrowed("worlded"),
// char_end: "worlded".chars().count(),
// byte_end: "worlded".len(),
// ..Default::default()
// },
// Token {
// kind: TokenKind::Word,
// lemma: Cow::Borrowed("thisnew"),
// char_end: "thisnew".chars().count(),
// byte_end: "thisnew".len(),
// ..Default::default()
// }
// ],
// text
// ),
// (
// vec![
// Match {
// char_count: 5,
// byte_len: 5,
// position: MatchPosition::Word { word_position: 0, token_position: 0 }
// },
// Match {
// char_count: 5,
// byte_len: 5,
// position: MatchPosition::Word { word_position: 2, token_position: 2 }
// },
// Match {
// char_count: 5,
// byte_len: 5,
// position: MatchPosition::Word { word_position: 3, token_position: 3 }
// }
// ],
// vec![
// QueryPosition { range: [0, 0], index: 0 },
// QueryPosition { range: [2, 2], index: 1 },
// QueryPosition { range: [2, 2], index: 2 }
// ]
// )
// );
// }
// }
#[test]
fn matching_words() {
let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap();
let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
let mut builder = TokenizerBuilder::default();
let tokenizer = builder.build();
let text = "split this world";
let tokens = tokenizer.tokenize(text);
let ExtractedTokens { query_terms, .. } =
located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
let matching_words = MatchingWords::new(ctx, &query_terms);
assert_eq!(
matching_words.get_matches_and_query_positions(
&[
Token {
kind: TokenKind::Word,
lemma: Cow::Borrowed("split"),
char_end: "split".chars().count(),
byte_end: "split".len(),
..Default::default()
},
Token {
kind: TokenKind::Word,
lemma: Cow::Borrowed("nyc"),
char_end: "nyc".chars().count(),
byte_end: "nyc".len(),
..Default::default()
},
Token {
kind: TokenKind::Word,
lemma: Cow::Borrowed("world"),
char_end: "world".chars().count(),
byte_end: "world".len(),
..Default::default()
},
Token {
kind: TokenKind::Word,
lemma: Cow::Borrowed("worlded"),
char_end: "worlded".chars().count(),
byte_end: "worlded".len(),
..Default::default()
},
Token {
kind: TokenKind::Word,
lemma: Cow::Borrowed("thisnew"),
char_end: "thisnew".chars().count(),
byte_end: "thisnew".len(),
..Default::default()
}
],
text
),
(
vec![
Match {
char_count: 5,
byte_len: 5,
position: MatchPosition::Word { word_position: 0, token_position: 0 }
},
Match {
char_count: 5,
byte_len: 5,
position: MatchPosition::Word { word_position: 2, token_position: 2 }
},
Match {
char_count: 5,
byte_len: 5,
position: MatchPosition::Word { word_position: 3, token_position: 3 }
}
],
vec![
QueryPosition { range: [0, 0], index: 0 },
QueryPosition { range: [2, 2], index: 1 },
QueryPosition { range: [2, 2], index: 2 }
]
)
);
}
}

View file

@ -200,7 +200,7 @@ mod tests {
format_options: Option<FormatOptions>,
text: &str,
query: &str,
expected_text: &str,
expected_maybe_text: Option<&str>,
) {
let temp_index = TempIndex::new();
@ -216,7 +216,28 @@ mod tests {
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query);
let mut matcher = builder.build(text, None);
assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
assert_eq!(
matcher.get_formatted_text(format_options),
expected_maybe_text.map(|v| v.to_string())
);
}
struct FormatVariations<'a> {
highlight_with_crop: Option<&'a str>,
highlight: Option<&'a str>,
crop: Option<&'a str>,
}
impl<'a> FormatVariations<'a> {
fn get(&self) -> [(Option<FormatOptions>, Option<&'a str>); 5] {
[
(None, None),
(Some(FormatOptions { highlight: true, crop: Some(2) }), self.highlight_with_crop),
(Some(FormatOptions { highlight: true, crop: None }), self.highlight),
(Some(FormatOptions { highlight: false, crop: Some(2) }), self.crop),
(Some(FormatOptions { highlight: false, crop: None }), None),
]
}
}
/// "Dei store fiskane eta dei små — dei liger under som minst förmå."
@ -225,77 +246,66 @@ mod tests {
fn rename_me_with_base_text(
format_options: Option<FormatOptions>,
query: &str,
expected_text: &str,
expected_maybe_text: Option<&str>,
) {
rename_me(
format_options,
"Dei store fiskane eta dei små — dei liger under som minst förmå.",
query,
expected_text,
expected_maybe_text,
);
}
#[test]
fn phrase_highlight_bigger_than_crop() {
rename_me_with_base_text(
Some(FormatOptions { highlight: true, crop: Some(1) }),
"\"dei liger\"",
"…<em>dei</em>…",
);
fn empty_query() {
for (format_options, expected_maybe_text) in (FormatVariations {
highlight_with_crop: Some("Dei store…"),
highlight: None,
crop: Some("Dei store…"),
}
.get())
{
rename_me_with_base_text(format_options, "", expected_maybe_text);
}
}
#[test]
fn phrase_highlight_same_size_as_crop() {
rename_me_with_base_text(
Some(FormatOptions { highlight: true, crop: Some(2) }),
"\"dei liger\"",
"…<em>dei liger</em>…",
);
}
#[test]
fn phrase_highlight_crop_middle() {
rename_me_with_base_text(
Some(FormatOptions { highlight: true, crop: Some(4) }),
"\"dei liger\"",
"…små — <em>dei liger</em> under…",
);
}
#[test]
fn phrase_highlight_crop_end() {
rename_me_with_base_text(
Some(FormatOptions { highlight: true, crop: Some(4) }),
"\"minst förmå\"",
"…under som <em>minst förmå</em>.",
);
}
#[test]
fn phrase_highlight_crop_beginning() {
rename_me_with_base_text(
Some(FormatOptions { highlight: true, crop: Some(4) }),
"\"Dei store\"",
"<em>Dei store</em> fiskane eta…",
);
fn only_separators() {
for (format_options, expected_maybe_text) in (FormatVariations {
highlight_with_crop: Some(":-…"),
highlight: None,
crop: Some(":-…"),
}
.get())
{
rename_me(format_options, ":-)", ":-)", expected_maybe_text);
}
}
#[test]
fn highlight_end() {
rename_me_with_base_text(
Some(FormatOptions { highlight: true, crop: None }),
"minst förmå",
"Dei store fiskane eta dei små — dei liger under som <em>minst</em> <em>förmå</em>.",
);
// TODO: Why is "förmå" marked as prefix in located matching words?
for (format_options, expected_maybe_text) in (FormatVariations {
highlight_with_crop: Some("…<em>minst</em> <em>förmå</em>."),
highlight: Some("Dei store fiskane eta dei små — dei liger under som <em>minst</em> <em>förmå</em>."),
crop: Some("…minst förmå."),
}
.get()) {
rename_me_with_base_text(format_options, "minst förmå", expected_maybe_text);
}
}
#[test]
fn highlight_beginning_and_middle() {
rename_me_with_base_text(
Some(FormatOptions { highlight: true, crop: None }),
"Dei store",
"<em>Dei</em> <em>store</em> fiskane eta <em>dei</em> små — <em>dei</em> liger under som minst förmå.",
);
// TODO: Why is "store" marked as prefix in located matching words?
for (format_options, expected_maybe_text) in (FormatVariations {
highlight_with_crop: Some("<em>Dei</em> <em>store</em>…"),
highlight: Some("<em>Dei</em> <em>store</em> fiskane eta <em>dei</em> små — <em>dei</em> liger under som minst förmå."),
crop: Some("Dei store…"),
}
.get()) {
rename_me_with_base_text(format_options, "Dei store", expected_maybe_text);
}
}
#[test]
@ -306,291 +316,185 @@ mod tests {
// `milli::search::new::query_term::QueryTerm::all_computed_derivations` might be at fault here
// interned words = ["forma"]
rename_me(
Some(FormatOptions { highlight: true, crop: None }),
"altså, förmå, på en måte",
"fo",
"altså, <em>förmå</em>, på en måte",
);
for (format_options, expected_maybe_text) in (FormatVariations {
highlight_with_crop: Some("…<em>förmå</em>, på…"),
highlight: Some("altså, <em>förmå</em>, på en måte"),
crop: Some("…förmå, på…"),
}
.get())
{
rename_me(format_options, "altså, förmå, på en måte", "fo", expected_maybe_text);
}
// interned words = ["fo", "forma"]
rename_me(
Some(FormatOptions { highlight: true, crop: None }),
"altså, fo förmå, på en måte",
"fo",
"altså, <em>fo</em> <em>fö</em>rmå, på en måte",
);
for (format_options, expected_maybe_text) in (FormatVariations {
highlight_with_crop: Some("…<em>fo</em> <em>fö</em>rmå…"),
highlight: Some("altså, <em>fo</em> <em>fö</em>rmå, på en måte"),
crop: Some("…fo förmå…"),
}
.get())
{
rename_me(format_options, "altså, fo förmå, på en måte", "fo", expected_maybe_text);
}
}
#[test]
fn partial_match_end() {
rename_me(
Some(FormatOptions { highlight: true, crop: None }),
"förmå, på en måte",
"fo",
"<em>förmå</em>, på en måte",
);
for (format_options, expected_maybe_text) in (FormatVariations {
highlight_with_crop: Some("<em>förmå</em>, på…"),
highlight: Some("<em>förmå</em>, på en måte"),
crop: Some("förmå, på…"),
}
.get())
{
rename_me(format_options, "förmå, på en måte", "fo", expected_maybe_text);
}
rename_me(
Some(FormatOptions { highlight: true, crop: None }),
"fo förmå, på en måte",
"fo",
"<em>fo</em> <em>fö</em>rmå, på en måte",
);
for (format_options, expected_maybe_text) in (FormatVariations {
highlight_with_crop: Some("<em>fo</em> <em>fö</em>rmå…"),
highlight: Some("<em>fo</em> <em>fö</em>rmå, på en måte"),
crop: Some("fo förmå…"),
}
.get())
{
rename_me(format_options, "fo förmå, på en måte", "fo", expected_maybe_text);
}
}
#[test]
fn partial_match_beginning() {
rename_me(
Some(FormatOptions { highlight: true, crop: None }),
"altså, förmå",
"fo",
"altså, <em>förmå</em>",
);
for (format_options, expected_maybe_text) in (FormatVariations {
highlight_with_crop: Some("altså, <em>förmå</em>"),
highlight: Some("altså, <em>förmå</em>"),
crop: Some("altså, förmå"),
}
.get())
{
rename_me(format_options, "altså, förmå", "fo", expected_maybe_text);
}
rename_me(
Some(FormatOptions { highlight: true, crop: None }),
"altså, fo förmå",
"fo",
"altså, <em>fo</em> <em>fö</em>rmå",
for (format_options, expected_maybe_text) in (FormatVariations {
highlight_with_crop: Some("…<em>fo</em> <em>fö</em>rmå"),
highlight: Some("altså, <em>fo</em> <em>fö</em>rmå"),
crop: Some("…fo förmå"),
}
.get())
{
rename_me(format_options, "altså, fo förmå", "fo", expected_maybe_text);
}
}
#[test]
fn separator_at_end() {
for (format_options, expected_maybe_text) in (FormatVariations {
highlight_with_crop: Some("…<em>minst</em> förmå. , ;"),
highlight: Some("; , — dei liger under som <em>minst</em> förmå. , ;"),
crop: Some("…minst förmå. , ;"),
}
.get())
{
rename_me(
format_options,
"; , — dei liger under som minst förmå. , ;",
"minst",
expected_maybe_text,
);
}
}
#[test]
fn separator_at_beginning() {
for (format_options, expected_maybe_text) in (FormatVariations {
highlight_with_crop: Some("; , — <em>dei</em> liger…"),
highlight: Some("; , — <em>dei</em> liger under som minst förmå. , ;"),
crop: Some("; , — dei liger…"),
}
.get())
{
rename_me(
format_options,
"; , — dei liger under som minst förmå. , ;",
"dei",
expected_maybe_text,
);
}
}
#[test]
fn phrase() {
for (format_options, expected_maybe_text) in (FormatVariations {
highlight_with_crop: Some("…<em>dei liger</em>…"),
highlight: Some(
"Dei store fiskane eta dei små — <em>dei liger</em> under som minst förmå.",
),
crop: Some("…dei liger…"),
}
.get())
{
rename_me_with_base_text(format_options, "\"dei liger\"", expected_maybe_text);
}
}
#[test]
fn phrase_highlight_bigger_than_crop() {
rename_me_with_base_text(
Some(FormatOptions { highlight: true, crop: Some(1) }),
"\"dei liger\"",
Some("…<em>dei</em>…"),
);
}
// #[test]
// fn format_identity() {
// let temp_index = temp_index_with_documents(None);
// let rtxn = temp_index.read_txn().unwrap();
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
// let format_options = Some(FormatOptions { highlight: false, crop: None });
#[test]
fn phrase_bigger_than_crop() {
rename_me_with_base_text(
Some(FormatOptions { highlight: false, crop: Some(1) }),
"\"dei liger\"",
Some("…dei…"),
);
}
// let test_values = [
// // Text without any match.
// "A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
// // Text containing all matches.
// "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
// // Text containing some matches.
// "Natalie risk her future to build a world with the boy she loves."
// ];
#[test]
fn phrase_highlight_crop_middle() {
rename_me_with_base_text(
Some(FormatOptions { highlight: true, crop: Some(4) }),
"\"dei liger\"",
Some("…små — <em>dei liger</em> under…"),
);
}
// for text in test_values {
// let mut matcher = builder.build(text, None);
// // no crop and no highlight should return complete text.
// assert_eq!(matcher.get_formatted_text(format_options), None);
// }
// }
#[test]
fn phrase_crop_middle() {
rename_me_with_base_text(
Some(FormatOptions { highlight: false, crop: Some(4) }),
"\"dei liger\"",
Some("…små — dei liger under…"),
);
}
// #[test]
// fn format_highlight() {
// let temp_index = temp_index_with_documents(None);
// let rtxn = temp_index.read_txn().unwrap();
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
// let format_options = Some(FormatOptions { highlight: true, crop: None });
#[test]
fn phrase_highlight_crop_end() {
rename_me_with_base_text(
Some(FormatOptions { highlight: true, crop: Some(4) }),
"\"minst förmå\"",
Some("…under som <em>minst förmå</em>."),
);
}
// let test_values = [
// // empty text.
// ["", ""],
// // text containing only separators.
// [":-)", ":-)"],
// // Text without any match.
// ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
// "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"],
// // Text containing all matches.
// ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
// "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."],
// // Text containing some matches.
// ["Natalie risk her future to build a world with the boy she loves.",
// "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves."],
// ];
#[test]
fn phrase_crop_end() {
rename_me_with_base_text(
Some(FormatOptions { highlight: false, crop: Some(4) }),
"\"minst förmå\"",
Some("…under som minst förmå."),
);
}
// for [text, expected_text] in test_values {
// let mut matcher = builder.build(text, None);
// // no crop should return complete text with highlighted matches.
// assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
// }
// }
// #[test]
// fn highlight_unicode() {
// let temp_index = temp_index_with_documents(None);
// let rtxn = temp_index.read_txn().unwrap();
// let format_options = Some(FormatOptions { highlight: true, crop: None });
// let test_values = [
// // Text containing prefix match.
// ["world", "Ŵôřlḑôle", "<em>Ŵôřlḑ</em>ôle"],
// // Text containing unicode match.
// ["world", "Ŵôřlḑ", "<em>Ŵôřlḑ</em>"],
// // Text containing unicode match.
// ["westfali", "Westfália", "<em>Westfáli</em>a"],
// ];
// for [query, text, expected_text] in test_values {
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query);
// let mut matcher = builder.build(text, None);
// // no crop should return complete text with highlighted matches.
// assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
// }
// }
// #[test]
// fn format_crop() {
// let temp_index = temp_index_with_documents(None);
// let rtxn = temp_index.read_txn().unwrap();
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
// let format_options = Some(FormatOptions { highlight: false, crop: Some(10) });
// let test_values = [
// // empty text.
// // ["", ""],
// // text containing only separators.
// // [":-)", ":-)"],
// // Text without any match.
// ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
// "A quick brown fox can not jump 32 feet, right…"],
// // Text without any match starting by a separator.
// ["(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)",
// "(A quick brown fox can not jump 32 feet, right…" ],
// // Test phrase propagation
// ["Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.",
// "…Split The World is a book written by Emily Henry…"],
// // Text containing some matches.
// ["Natalie risk her future to build a world with the boy she loves.",
// "…future to build a world with the boy she loves."],
// // Text containing all matches.
// ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
// "…she loves. Emily Henry: The Love That Split The World."],
// // Text containing a match unordered and a match ordered.
// ["The world split void void void void void void void void void split the world void void",
// "…void void void void void split the world void void"],
// // Text containing matches with different density.
// ["split void the void void world void void void void void void void void void void split the world void void",
// "…void void void void void split the world void void"],
// ["split split split split split split void void void void void void void void void void split the world void void",
// "…void void void void void split the world void void"]
// ];
// for [text, expected_text] in test_values {
// let mut matcher = builder.build(text, None);
// // no crop should return complete text with highlighted matches.
// assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
// }
// }
// #[test]
// fn format_highlight_crop() {
// let temp_index = temp_index_with_documents(None);
// let rtxn = temp_index.read_txn().unwrap();
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
// let format_options = Some(FormatOptions { highlight: true, crop: Some(10) });
// let test_values = [
// // empty text.
// ["", ""],
// // text containing only separators.
// [":-)", ":-)"],
// // Text without any match.
// ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
// "A quick brown fox can not jump 32 feet, right…"],
// // Text containing some matches.
// ["Natalie risk her future to build a world with the boy she loves.",
// "…future to build a <em>world</em> with <em>the</em> boy she loves."],
// // Text containing all matches.
// ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
// "…she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."],
// // Text containing a match unordered and a match ordered.
// ["The world split void void void void void void void void void split the world void void",
// "…void void void void void <em>split</em> <em>the</em> <em>world</em> void void"]
// ];
// for [text, expected_text] in test_values {
// let mut matcher = builder.build(text, None);
// // no crop should return complete text with highlighted matches.
// assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
// }
// }
// #[test]
// fn format_highlight_crop_phrase_query() {
// //! testing: https://github.com/meilisearch/meilisearch/issues/3975
// let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
// let temp_index = temp_index_with_documents(Some(documents!([
// { "id": 1, "text": text }
// ])));
// let rtxn = temp_index.read_txn().unwrap();
// let format_options = Some(FormatOptions { highlight: true, crop: Some(10) });
// let test_values = [
// // should return 10 words with a marker at the start as well the end, and the highlighted matches.
// ["\"the world\"",
// "…the power to split <em>the world</em> between those who embraced…"],
// // should highlight "those" and the phrase "and those".
// ["those \"and those\"",
// "…world between <em>those</em> who embraced progress <em>and those</em> who resisted…"],
// ["\"The groundbreaking invention had the power to split the world\"",
// "<em>The groundbreaking invention had the power to split the world</em>…"],
// ["\"The groundbreaking invention had the power to split the world between those\"",
// "<em>The groundbreaking invention had the power to split the world</em>…"],
// ["\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"",
// "…between those who <em>embraced progress and those who resisted change</em>!"],
// ["\"groundbreaking invention\" \"split the world between\"",
// "…<em>groundbreaking invention</em> had the power to <em>split the world between</em>…"],
// ["\"groundbreaking invention\" \"had the power to split the world between those\"",
// "…<em>invention</em> <em>had the power to split the world between those</em>…"],
// ];
// for [query, expected_text] in test_values {
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query);
// let mut matcher = builder.build(text, None);
// assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
// }
// }
// #[test]
// fn smaller_crop_size() {
// //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
// let temp_index = temp_index_with_documents(None);
// let rtxn = temp_index.read_txn().unwrap();
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
// let text = "void void split the world void void.";
// let mut matcher = builder.build(text, None);
// let test_values = [
// // set a smaller crop size
// // because crop size < query size, partially format matches.
// (2, "…split the…"),
// // set a smaller crop size
// // because crop size < query size, partially format matches.
// (1, "…split…"),
// // set crop size to 0
// // because crop size is 0, crop is ignored.
// (0, "void void split the world void void."),
// ];
// for (crop_size, expected_text) in test_values {
// // set a smaller crop size
// let format_options = Some(FormatOptions { highlight: false, crop: Some(crop_size) });
// assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
// }
// }
// #[test]
// fn partial_matches() {
// let temp_index = temp_index_with_documents(None);
// let rtxn = temp_index.read_txn().unwrap();
// let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "the \"t he\" door \"do or\"");
// let format_options = Some(FormatOptions { highlight: true, crop: None });
// let text = "the do or die can't be he do and or isn't he";
// let mut matcher = builder.build(text, None);
// assert_eq!(
// matcher.get_formatted_text(format_options),
// Some(
// "<em>the</em> <em>do or</em> die can't be he do and or isn'<em>t he</em>"
// .to_string()
// )
// );
// }
#[test]
fn phrase_highlight_crop_beginning() {
rename_me_with_base_text(
Some(FormatOptions { highlight: true, crop: Some(4) }),
"\"Dei store\"",
Some("<em>Dei store</em> fiskane eta…"),
);
}
}