diff --git a/crates/milli/.tmp4e121b/data.mdb b/crates/milli/.tmp4e121b/data.mdb deleted file mode 100644 index f6705d4f1..000000000 Binary files a/crates/milli/.tmp4e121b/data.mdb and /dev/null differ diff --git a/crates/milli/.tmp4e121b/lock.mdb b/crates/milli/.tmp4e121b/lock.mdb deleted file mode 100644 index b4ab05270..000000000 Binary files a/crates/milli/.tmp4e121b/lock.mdb and /dev/null differ diff --git a/crates/milli/.tmpNxMsye/data.mdb b/crates/milli/.tmpNxMsye/data.mdb deleted file mode 100644 index ea920733d..000000000 Binary files a/crates/milli/.tmpNxMsye/data.mdb and /dev/null differ diff --git a/crates/milli/.tmpNxMsye/lock.mdb b/crates/milli/.tmpNxMsye/lock.mdb deleted file mode 100644 index abe89541a..000000000 Binary files a/crates/milli/.tmpNxMsye/lock.mdb and /dev/null differ diff --git a/crates/milli/src/search/new/matches/adjust_indices.rs b/crates/milli/src/search/new/matches/adjust_indices.rs index b7d9ad793..6c5df2ddf 100644 --- a/crates/milli/src/search/new/matches/adjust_indices.rs +++ b/crates/milli/src/search/new/matches/adjust_indices.rs @@ -33,8 +33,8 @@ fn get_adjusted_indices_for_too_few_words( let mut is_index_backwards_at_hard_separator = false; let mut is_index_forwards_at_hard_separator = false; - // false + ends reached because TODO - let mut is_crop_size_or_both_ends_reached = is_end_reached && is_beginning_reached; + let mut is_crop_size_or_both_ends_reached = + words_count == crop_size || (is_end_reached && is_beginning_reached); let mut dir = Direction::Forwards; @@ -108,18 +108,38 @@ fn get_adjusted_indices_for_too_few_words( // 2. if forwards index reached a hard separator and backwards is currently hard, we can go backwards } - // keep advancing forward to check if there's only separator tokens left until the end - // if so, then include those too in the index range - let mut try_index_forward = valid_index_forward + 1; - while let Some(token) = tokens.get(try_index_forward) { - if !token.is_separator() { - return [valid_index_backward, valid_index_forward]; + // keep advancing forward and backward to check if there's only separator tokens + // left until the end if so, then include those too in the index range + + let saved_index = valid_index_forward; + loop { + if valid_index_forward == tokens.len() - 1 { + break; } - try_index_forward += 1; + valid_index_forward += 1; + + if !tokens[valid_index_forward].is_separator() { + valid_index_forward = saved_index; + break; + } } - [valid_index_backward, try_index_forward - 1] + let saved_index = valid_index_backward; + loop { + if valid_index_backward == 0 { + break; + } + + valid_index_backward -= 1; + + if !tokens[valid_index_backward].is_separator() { + valid_index_backward = saved_index; + break; + } + } + + [valid_index_backward, valid_index_forward] } fn get_adjusted_index_forward_for_too_many_words( @@ -158,14 +178,13 @@ pub fn get_adjusted_indices_for_highlights_and_crop_size( crop_size: usize, ) -> [usize; 2] { match words_count.cmp(&crop_size) { - Ordering::Less => get_adjusted_indices_for_too_few_words( + Ordering::Equal | Ordering::Less => get_adjusted_indices_for_too_few_words( tokens, index_backward, index_forward, words_count, crop_size, ), - Ordering::Equal => [index_backward, index_forward], Ordering::Greater => [ index_backward, get_adjusted_index_forward_for_too_many_words( diff --git a/crates/milli/src/search/new/matches/matching_words.rs b/crates/milli/src/search/new/matches/matching_words.rs index ab7f90f05..3edc3eb38 100644 --- a/crates/milli/src/search/new/matches/matching_words.rs +++ b/crates/milli/src/search/new/matches/matching_words.rs @@ -247,12 +247,22 @@ impl MatchingWords { // TODO: There is potentially an optimization to be made here // if we matched a term then we can skip checking it for further iterations? + println!( + "{:?}", + self.located_matching_words + .iter() + .flat_map(|lw| lw.value.iter().map(move |w| ( + lw.is_prefix, + lw.original_char_count, + self.word_interner.get(*w) + ))) + .collect::>() + ); + self.located_matching_words .iter() - .flat_map(|lw| lw.value.iter().map(move |w| (lw, w))) + .flat_map(|lw| lw.value.iter().map(move |w| (lw, self.word_interner.get(*w)))) .find_map(|(located_words, word)| { - let word = self.word_interner.get(*word); - let [char_count, byte_len] = match PrefixedOrEquality::new(tph.token.lemma(), word, located_words.is_prefix) { @@ -368,93 +378,105 @@ impl Debug for MatchingWords { } } -// #[cfg(test)] -// pub(crate) mod tests { -// use super::super::super::located_query_terms_from_tokens; -// use super::*; -// use crate::search::new::matches::tests::temp_index_with_documents; -// use crate::search::new::query_term::ExtractedTokens; -// use charabia::{TokenKind, TokenizerBuilder}; -// use std::borrow::Cow; +#[cfg(test)] +mod tests { + use super::super::super::located_query_terms_from_tokens; + use super::*; + use crate::index::tests::TempIndex; + use crate::search::new::query_term::ExtractedTokens; + use charabia::{TokenKind, TokenizerBuilder}; + use std::borrow::Cow; -// #[test] -// fn matching_words() { -// let temp_index = temp_index_with_documents(None); -// let rtxn = temp_index.read_txn().unwrap(); -// let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap(); -// let mut builder = TokenizerBuilder::default(); -// let tokenizer = builder.build(); -// let text = "split this world"; -// let tokens = tokenizer.tokenize(text); -// let ExtractedTokens { query_terms, .. } = -// located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap(); -// let matching_words = MatchingWords::new(ctx, &query_terms); + fn temp_index_with_documents() -> TempIndex { + let temp_index = TempIndex::new(); + temp_index + .add_documents(documents!([ + { "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" }, + { "id": 2, "name": "Westfália" }, + { "id": 3, "name": "Ŵôřlḑôle" }, + ])) + .unwrap(); + temp_index + } -// assert_eq!( -// matching_words.get_matches_and_query_positions( -// &[ -// Token { -// kind: TokenKind::Word, -// lemma: Cow::Borrowed("split"), -// char_end: "split".chars().count(), -// byte_end: "split".len(), -// ..Default::default() -// }, -// Token { -// kind: TokenKind::Word, -// lemma: Cow::Borrowed("nyc"), -// char_end: "nyc".chars().count(), -// byte_end: "nyc".len(), -// ..Default::default() -// }, -// Token { -// kind: TokenKind::Word, -// lemma: Cow::Borrowed("world"), -// char_end: "world".chars().count(), -// byte_end: "world".len(), -// ..Default::default() -// }, -// Token { -// kind: TokenKind::Word, -// lemma: Cow::Borrowed("worlded"), -// char_end: "worlded".chars().count(), -// byte_end: "worlded".len(), -// ..Default::default() -// }, -// Token { -// kind: TokenKind::Word, -// lemma: Cow::Borrowed("thisnew"), -// char_end: "thisnew".chars().count(), -// byte_end: "thisnew".len(), -// ..Default::default() -// } -// ], -// text -// ), -// ( -// vec![ -// Match { -// char_count: 5, -// byte_len: 5, -// position: MatchPosition::Word { word_position: 0, token_position: 0 } -// }, -// Match { -// char_count: 5, -// byte_len: 5, -// position: MatchPosition::Word { word_position: 2, token_position: 2 } -// }, -// Match { -// char_count: 5, -// byte_len: 5, -// position: MatchPosition::Word { word_position: 3, token_position: 3 } -// } -// ], -// vec![ -// QueryPosition { range: [0, 0], index: 0 }, -// QueryPosition { range: [2, 2], index: 1 }, -// QueryPosition { range: [2, 2], index: 2 } -// ] -// ) -// ); -// } -// } + #[test] + fn matching_words() { + let temp_index = temp_index_with_documents(); + let rtxn = temp_index.read_txn().unwrap(); + let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap(); + let mut builder = TokenizerBuilder::default(); + let tokenizer = builder.build(); + let text = "split this world"; + let tokens = tokenizer.tokenize(text); + let ExtractedTokens { query_terms, .. } = + located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap(); + let matching_words = MatchingWords::new(ctx, &query_terms); + + assert_eq!( + matching_words.get_matches_and_query_positions( + &[ + Token { + kind: TokenKind::Word, + lemma: Cow::Borrowed("split"), + char_end: "split".chars().count(), + byte_end: "split".len(), + ..Default::default() + }, + Token { + kind: TokenKind::Word, + lemma: Cow::Borrowed("nyc"), + char_end: "nyc".chars().count(), + byte_end: "nyc".len(), + ..Default::default() + }, + Token { + kind: TokenKind::Word, + lemma: Cow::Borrowed("world"), + char_end: "world".chars().count(), + byte_end: "world".len(), + ..Default::default() + }, + Token { + kind: TokenKind::Word, + lemma: Cow::Borrowed("worlded"), + char_end: "worlded".chars().count(), + byte_end: "worlded".len(), + ..Default::default() + }, + Token { + kind: TokenKind::Word, + lemma: Cow::Borrowed("thisnew"), + char_end: "thisnew".chars().count(), + byte_end: "thisnew".len(), + ..Default::default() + } + ], + text + ), + ( + vec![ + Match { + char_count: 5, + byte_len: 5, + position: MatchPosition::Word { word_position: 0, token_position: 0 } + }, + Match { + char_count: 5, + byte_len: 5, + position: MatchPosition::Word { word_position: 2, token_position: 2 } + }, + Match { + char_count: 5, + byte_len: 5, + position: MatchPosition::Word { word_position: 3, token_position: 3 } + } + ], + vec![ + QueryPosition { range: [0, 0], index: 0 }, + QueryPosition { range: [2, 2], index: 1 }, + QueryPosition { range: [2, 2], index: 2 } + ] + ) + ); + } +} diff --git a/crates/milli/src/search/new/matches/mod.rs b/crates/milli/src/search/new/matches/mod.rs index bab82da8c..f47582af7 100644 --- a/crates/milli/src/search/new/matches/mod.rs +++ b/crates/milli/src/search/new/matches/mod.rs @@ -200,7 +200,7 @@ mod tests { format_options: Option, text: &str, query: &str, - expected_text: &str, + expected_maybe_text: Option<&str>, ) { let temp_index = TempIndex::new(); @@ -216,7 +216,28 @@ mod tests { let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query); let mut matcher = builder.build(text, None); - assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string())); + assert_eq!( + matcher.get_formatted_text(format_options), + expected_maybe_text.map(|v| v.to_string()) + ); + } + + struct FormatVariations<'a> { + highlight_with_crop: Option<&'a str>, + highlight: Option<&'a str>, + crop: Option<&'a str>, + } + + impl<'a> FormatVariations<'a> { + fn get(&self) -> [(Option, Option<&'a str>); 5] { + [ + (None, None), + (Some(FormatOptions { highlight: true, crop: Some(2) }), self.highlight_with_crop), + (Some(FormatOptions { highlight: true, crop: None }), self.highlight), + (Some(FormatOptions { highlight: false, crop: Some(2) }), self.crop), + (Some(FormatOptions { highlight: false, crop: None }), None), + ] + } } /// "Dei store fiskane eta dei små — dei liger under som minst förmå." @@ -225,77 +246,66 @@ mod tests { fn rename_me_with_base_text( format_options: Option, query: &str, - expected_text: &str, + expected_maybe_text: Option<&str>, ) { rename_me( format_options, "Dei store fiskane eta dei små — dei liger under som minst förmå.", query, - expected_text, + expected_maybe_text, ); } #[test] - fn phrase_highlight_bigger_than_crop() { - rename_me_with_base_text( - Some(FormatOptions { highlight: true, crop: Some(1) }), - "\"dei liger\"", - "…dei…", - ); + fn empty_query() { + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("Dei store…"), + highlight: None, + crop: Some("Dei store…"), + } + .get()) + { + rename_me_with_base_text(format_options, "", expected_maybe_text); + } } #[test] - fn phrase_highlight_same_size_as_crop() { - rename_me_with_base_text( - Some(FormatOptions { highlight: true, crop: Some(2) }), - "\"dei liger\"", - "…dei liger…", - ); - } - - #[test] - fn phrase_highlight_crop_middle() { - rename_me_with_base_text( - Some(FormatOptions { highlight: true, crop: Some(4) }), - "\"dei liger\"", - "…små — dei liger under…", - ); - } - - #[test] - fn phrase_highlight_crop_end() { - rename_me_with_base_text( - Some(FormatOptions { highlight: true, crop: Some(4) }), - "\"minst förmå\"", - "…under som minst förmå.", - ); - } - - #[test] - fn phrase_highlight_crop_beginning() { - rename_me_with_base_text( - Some(FormatOptions { highlight: true, crop: Some(4) }), - "\"Dei store\"", - "Dei store fiskane eta…", - ); + fn only_separators() { + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some(":-…"), + highlight: None, + crop: Some(":-…"), + } + .get()) + { + rename_me(format_options, ":-)", ":-)", expected_maybe_text); + } } #[test] fn highlight_end() { - rename_me_with_base_text( - Some(FormatOptions { highlight: true, crop: None }), - "minst förmå", - "Dei store fiskane eta dei små — dei liger under som minst förmå.", - ); + // TODO: Why is "förmå" marked as prefix in located matching words? + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("…minst förmå."), + highlight: Some("Dei store fiskane eta dei små — dei liger under som minst förmå."), + crop: Some("…minst förmå."), + } + .get()) { + rename_me_with_base_text(format_options, "minst förmå", expected_maybe_text); + } } #[test] fn highlight_beginning_and_middle() { - rename_me_with_base_text( - Some(FormatOptions { highlight: true, crop: None }), - "Dei store", - "Dei store fiskane eta dei små — dei liger under som minst förmå.", - ); + // TODO: Why is "store" marked as prefix in located matching words? + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("Dei store…"), + highlight: Some("Dei store fiskane eta dei små — dei liger under som minst förmå."), + crop: Some("Dei store…"), + } + .get()) { + rename_me_with_base_text(format_options, "Dei store", expected_maybe_text); + } } #[test] @@ -306,291 +316,185 @@ mod tests { // `milli::search::new::query_term::QueryTerm::all_computed_derivations` might be at fault here // interned words = ["forma"] - rename_me( - Some(FormatOptions { highlight: true, crop: None }), - "altså, förmå, på en måte", - "fo", - "altså, förmå, på en måte", - ); + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("…förmå, på…"), + highlight: Some("altså, förmå, på en måte"), + crop: Some("…förmå, på…"), + } + .get()) + { + rename_me(format_options, "altså, förmå, på en måte", "fo", expected_maybe_text); + } // interned words = ["fo", "forma"] - rename_me( - Some(FormatOptions { highlight: true, crop: None }), - "altså, fo förmå, på en måte", - "fo", - "altså, fo rmå, på en måte", - ); + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("…fo rmå…"), + highlight: Some("altså, fo rmå, på en måte"), + crop: Some("…fo förmå…"), + } + .get()) + { + rename_me(format_options, "altså, fo förmå, på en måte", "fo", expected_maybe_text); + } } #[test] fn partial_match_end() { - rename_me( - Some(FormatOptions { highlight: true, crop: None }), - "förmå, på en måte", - "fo", - "förmå, på en måte", - ); + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("förmå, på…"), + highlight: Some("förmå, på en måte"), + crop: Some("förmå, på…"), + } + .get()) + { + rename_me(format_options, "förmå, på en måte", "fo", expected_maybe_text); + } - rename_me( - Some(FormatOptions { highlight: true, crop: None }), - "fo förmå, på en måte", - "fo", - "fo rmå, på en måte", - ); + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("fo rmå…"), + highlight: Some("fo rmå, på en måte"), + crop: Some("fo förmå…"), + } + .get()) + { + rename_me(format_options, "fo förmå, på en måte", "fo", expected_maybe_text); + } } #[test] fn partial_match_beginning() { - rename_me( - Some(FormatOptions { highlight: true, crop: None }), - "altså, förmå", - "fo", - "altså, förmå", - ); + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("altså, förmå"), + highlight: Some("altså, förmå"), + crop: Some("altså, förmå"), + } + .get()) + { + rename_me(format_options, "altså, förmå", "fo", expected_maybe_text); + } - rename_me( - Some(FormatOptions { highlight: true, crop: None }), - "altså, fo förmå", - "fo", - "altså, fo rmå", + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("…fo rmå"), + highlight: Some("altså, fo rmå"), + crop: Some("…fo förmå"), + } + .get()) + { + rename_me(format_options, "altså, fo förmå", "fo", expected_maybe_text); + } + } + + #[test] + fn separator_at_end() { + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("…minst förmå. , ;"), + highlight: Some("; , — dei liger under som minst förmå. , ;"), + crop: Some("…minst förmå. , ;"), + } + .get()) + { + rename_me( + format_options, + "; , — dei liger under som minst förmå. , ;", + "minst", + expected_maybe_text, + ); + } + } + + #[test] + fn separator_at_beginning() { + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("; , — dei liger…"), + highlight: Some("; , — dei liger under som minst förmå. , ;"), + crop: Some("; , — dei liger…"), + } + .get()) + { + rename_me( + format_options, + "; , — dei liger under som minst förmå. , ;", + "dei", + expected_maybe_text, + ); + } + } + + #[test] + fn phrase() { + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("…dei liger…"), + highlight: Some( + "Dei store fiskane eta dei små — dei liger under som minst förmå.", + ), + crop: Some("…dei liger…"), + } + .get()) + { + rename_me_with_base_text(format_options, "\"dei liger\"", expected_maybe_text); + } + } + + #[test] + fn phrase_highlight_bigger_than_crop() { + rename_me_with_base_text( + Some(FormatOptions { highlight: true, crop: Some(1) }), + "\"dei liger\"", + Some("…dei…"), ); } - // #[test] - // fn format_identity() { - // let temp_index = temp_index_with_documents(None); - // let rtxn = temp_index.read_txn().unwrap(); - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world"); - // let format_options = Some(FormatOptions { highlight: false, crop: None }); + #[test] + fn phrase_bigger_than_crop() { + rename_me_with_base_text( + Some(FormatOptions { highlight: false, crop: Some(1) }), + "\"dei liger\"", + Some("…dei…"), + ); + } - // let test_values = [ - // // Text without any match. - // "A quick brown fox can not jump 32 feet, right? Brr, it is cold!", - // // Text containing all matches. - // "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.", - // // Text containing some matches. - // "Natalie risk her future to build a world with the boy she loves." - // ]; + #[test] + fn phrase_highlight_crop_middle() { + rename_me_with_base_text( + Some(FormatOptions { highlight: true, crop: Some(4) }), + "\"dei liger\"", + Some("…små — dei liger under…"), + ); + } - // for text in test_values { - // let mut matcher = builder.build(text, None); - // // no crop and no highlight should return complete text. - // assert_eq!(matcher.get_formatted_text(format_options), None); - // } - // } + #[test] + fn phrase_crop_middle() { + rename_me_with_base_text( + Some(FormatOptions { highlight: false, crop: Some(4) }), + "\"dei liger\"", + Some("…små — dei liger under…"), + ); + } - // #[test] - // fn format_highlight() { - // let temp_index = temp_index_with_documents(None); - // let rtxn = temp_index.read_txn().unwrap(); - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world"); - // let format_options = Some(FormatOptions { highlight: true, crop: None }); + #[test] + fn phrase_highlight_crop_end() { + rename_me_with_base_text( + Some(FormatOptions { highlight: true, crop: Some(4) }), + "\"minst förmå\"", + Some("…under som minst förmå."), + ); + } - // let test_values = [ - // // empty text. - // ["", ""], - // // text containing only separators. - // [":-)", ":-)"], - // // Text without any match. - // ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!", - // "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"], - // // Text containing all matches. - // ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.", - // "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."], - // // Text containing some matches. - // ["Natalie risk her future to build a world with the boy she loves.", - // "Natalie risk her future to build a world with the boy she loves."], - // ]; + #[test] + fn phrase_crop_end() { + rename_me_with_base_text( + Some(FormatOptions { highlight: false, crop: Some(4) }), + "\"minst förmå\"", + Some("…under som minst förmå."), + ); + } - // for [text, expected_text] in test_values { - // let mut matcher = builder.build(text, None); - // // no crop should return complete text with highlighted matches. - // assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string())); - // } - // } - - // #[test] - // fn highlight_unicode() { - // let temp_index = temp_index_with_documents(None); - // let rtxn = temp_index.read_txn().unwrap(); - // let format_options = Some(FormatOptions { highlight: true, crop: None }); - - // let test_values = [ - // // Text containing prefix match. - // ["world", "Ŵôřlḑôle", "Ŵôřlḑôle"], - // // Text containing unicode match. - // ["world", "Ŵôřlḑ", "Ŵôřlḑ"], - // // Text containing unicode match. - // ["westfali", "Westfália", "Westfália"], - // ]; - - // for [query, text, expected_text] in test_values { - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query); - // let mut matcher = builder.build(text, None); - // // no crop should return complete text with highlighted matches. - // assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string())); - // } - // } - - // #[test] - // fn format_crop() { - // let temp_index = temp_index_with_documents(None); - // let rtxn = temp_index.read_txn().unwrap(); - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world"); - // let format_options = Some(FormatOptions { highlight: false, crop: Some(10) }); - - // let test_values = [ - // // empty text. - // // ["", ""], - // // text containing only separators. - // // [":-)", ":-)"], - // // Text without any match. - // ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!", - // "A quick brown fox can not jump 32 feet, right…"], - // // Text without any match starting by a separator. - // ["(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)", - // "(A quick brown fox can not jump 32 feet, right…" ], - // // Test phrase propagation - // ["Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.", - // "…Split The World is a book written by Emily Henry…"], - // // Text containing some matches. - // ["Natalie risk her future to build a world with the boy she loves.", - // "…future to build a world with the boy she loves."], - // // Text containing all matches. - // ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.", - // "…she loves. Emily Henry: The Love That Split The World."], - // // Text containing a match unordered and a match ordered. - // ["The world split void void void void void void void void void split the world void void", - // "…void void void void void split the world void void"], - // // Text containing matches with different density. - // ["split void the void void world void void void void void void void void void void split the world void void", - // "…void void void void void split the world void void"], - // ["split split split split split split void void void void void void void void void void split the world void void", - // "…void void void void void split the world void void"] - // ]; - - // for [text, expected_text] in test_values { - // let mut matcher = builder.build(text, None); - // // no crop should return complete text with highlighted matches. - // assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string())); - // } - // } - - // #[test] - // fn format_highlight_crop() { - // let temp_index = temp_index_with_documents(None); - // let rtxn = temp_index.read_txn().unwrap(); - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world"); - // let format_options = Some(FormatOptions { highlight: true, crop: Some(10) }); - - // let test_values = [ - // // empty text. - // ["", ""], - // // text containing only separators. - // [":-)", ":-)"], - // // Text without any match. - // ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!", - // "A quick brown fox can not jump 32 feet, right…"], - // // Text containing some matches. - // ["Natalie risk her future to build a world with the boy she loves.", - // "…future to build a world with the boy she loves."], - // // Text containing all matches. - // ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.", - // "…she loves. Emily Henry: The Love That Split The World."], - // // Text containing a match unordered and a match ordered. - // ["The world split void void void void void void void void void split the world void void", - // "…void void void void void split the world void void"] - // ]; - - // for [text, expected_text] in test_values { - // let mut matcher = builder.build(text, None); - // // no crop should return complete text with highlighted matches. - // assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string())); - // } - // } - - // #[test] - // fn format_highlight_crop_phrase_query() { - // //! testing: https://github.com/meilisearch/meilisearch/issues/3975 - // let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!"; - // let temp_index = temp_index_with_documents(Some(documents!([ - // { "id": 1, "text": text } - // ]))); - // let rtxn = temp_index.read_txn().unwrap(); - - // let format_options = Some(FormatOptions { highlight: true, crop: Some(10) }); - - // let test_values = [ - // // should return 10 words with a marker at the start as well the end, and the highlighted matches. - // ["\"the world\"", - // "…the power to split the world between those who embraced…"], - // // should highlight "those" and the phrase "and those". - // ["those \"and those\"", - // "…world between those who embraced progress and those who resisted…"], - // ["\"The groundbreaking invention had the power to split the world\"", - // "The groundbreaking invention had the power to split the world…"], - // ["\"The groundbreaking invention had the power to split the world between those\"", - // "The groundbreaking invention had the power to split the world…"], - // ["\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"", - // "…between those who embraced progress and those who resisted change!"], - // ["\"groundbreaking invention\" \"split the world between\"", - // "…groundbreaking invention had the power to split the world between…"], - // ["\"groundbreaking invention\" \"had the power to split the world between those\"", - // "…invention had the power to split the world between those…"], - // ]; - - // for [query, expected_text] in test_values { - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query); - // let mut matcher = builder.build(text, None); - - // assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string())); - // } - // } - - // #[test] - // fn smaller_crop_size() { - // //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295 - // let temp_index = temp_index_with_documents(None); - // let rtxn = temp_index.read_txn().unwrap(); - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world"); - // let text = "void void split the world void void."; - // let mut matcher = builder.build(text, None); - - // let test_values = [ - // // set a smaller crop size - // // because crop size < query size, partially format matches. - // (2, "…split the…"), - // // set a smaller crop size - // // because crop size < query size, partially format matches. - // (1, "…split…"), - // // set crop size to 0 - // // because crop size is 0, crop is ignored. - // (0, "void void split the world void void."), - // ]; - - // for (crop_size, expected_text) in test_values { - // // set a smaller crop size - // let format_options = Some(FormatOptions { highlight: false, crop: Some(crop_size) }); - // assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string())); - // } - // } - - // #[test] - // fn partial_matches() { - // let temp_index = temp_index_with_documents(None); - // let rtxn = temp_index.read_txn().unwrap(); - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "the \"t he\" door \"do or\""); - - // let format_options = Some(FormatOptions { highlight: true, crop: None }); - - // let text = "the do or die can't be he do and or isn't he"; - // let mut matcher = builder.build(text, None); - // assert_eq!( - // matcher.get_formatted_text(format_options), - // Some( - // "the do or die can't be he do and or isn't he" - // .to_string() - // ) - // ); - // } + #[test] + fn phrase_highlight_crop_beginning() { + rename_me_with_base_text( + Some(FormatOptions { highlight: true, crop: Some(4) }), + "\"Dei store\"", + Some("Dei store fiskane eta…"), + ); + } }