From 409bc6b4247d27a6fdc4a9ff6aede3b5684a8ca3 Mon Sep 17 00:00:00 2001 From: "F. Levi" <55688616+flevi29@users.noreply.github.com> Date: Fri, 20 Jun 2025 09:46:39 +0300 Subject: [PATCH] Added more tests, fixed issue --- crates/milli/.tmp4e121b/data.mdb | Bin 69632 -> 0 bytes crates/milli/.tmp4e121b/lock.mdb | Bin 8128 -> 0 bytes crates/milli/.tmpNxMsye/data.mdb | Bin 69632 -> 0 bytes crates/milli/.tmpNxMsye/lock.mdb | Bin 8128 -> 0 bytes .../src/search/new/matches/adjust_indices.rs | 43 +- .../src/search/new/matches/matching_words.rs | 204 ++++--- crates/milli/src/search/new/matches/mod.rs | 544 ++++++++---------- 7 files changed, 368 insertions(+), 423 deletions(-) delete mode 100644 crates/milli/.tmp4e121b/data.mdb delete mode 100644 crates/milli/.tmp4e121b/lock.mdb delete mode 100644 crates/milli/.tmpNxMsye/data.mdb delete mode 100644 crates/milli/.tmpNxMsye/lock.mdb diff --git a/crates/milli/.tmp4e121b/data.mdb b/crates/milli/.tmp4e121b/data.mdb deleted file mode 100644 index f6705d4f179f23fcb1b8d7064e7e1f3d8fedf335..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 69632 zcmeI5UuYc19mnTRl6|^AI~P}^sx8cMloX8CoK)GBgda?BX`s|C)Pyzg2=U{qtHui_~$AQgy~ktaqh%MwfMZ zU#_f{vP?WT8j@^WRpa-{ON(bq-~)ZXL&Ej>Bw-8$W~;d%nsUJ&++4^zhaDimV6j#sIq0J9S5Olg<-Hds1nj~ zNb7LCbB=90q1m=#XCVkzP1hb&Wn$L^>$uLMEfkpI7R7hRYCEwR#i8pj41BX>=U48n zvvYiAV1`yTJ9{LqzJCvF-aU0L1(6%OfxkOD5B7d1%HVd;_2V6?q@FWIcfQM8>6at&*;TNsCx+ zD4Hc$aXojJzuX%(nDy`rm6Y>A8dQHB+ApXAo| z|3A(y@C*DcH3tCm|F>^L009sH0T9@G0`UL)VV?g_6aGK^|LuECzR$q_hyTBI zpUL+Z`2Snel2^0D4y){C)(#!9X3RFNxH;XLK4!L#n^QAqT66QQnfa-yqu-dFo12?C z_Doaf?=IPW<=-m)ulL=j`Tw{0EzJMlzVVO)1V8`;KmY_l00ck)1V8`;KmY_D5(2cp zen~|2>7$Q2{;$*fQ+wt5|110o{Qr~)PCx(zKmY_l00ck)1V8`;KmY_lU~dVWR@LdX z`h_W0_*V0j<2E9(Ypv7hxY2pbcN$J?HEb#M-u-3c;}>65iA8t82^(EsY)RXQ0H%@Wh|`S)i2KaC<3*pxDIG&Qxzo?xf=I{yVf z!?}wAN<+>G~xf_{eOe^8TkM3 z|F`Zn`De_d=KqsNJ;Wlx?AA*1%Kgvig=^(*HloPx9ORDE|$AU3mT9 zJvNY$?W6I7oJ5UOC`l!4wjw*i z%KW$dC9wy<4gMM`0Z->yp|<#`HZKX@zT|Xdpc+0&nxGfWQmuq0BVUEWl$UT zX^EN%pb{k6IPQ>WQJebfGAh!qruma39h*?0e|k57thUI;*hl;$G2{P>+={Rr1V8`; zKmY_l00ck)1V8`;KmY_DMFQmW%RvM+ERoLtX9e>7Yblf>dx~AERr!C6N#jXloge3S z#mxW9hHqTtS88t?f2lS38~kJAQtcgnjz4Xj=6^Cit&JJ$wRdW-7^xbZfB*=900@8p z2!H?xfB*=900{I&fc&*GE6Ol*x@=oVZi_MioGb^|Ifp3hyh$p`rwh(?UR8dBw91C` zGW28tJLMK~sy|(juC)|oUFjlpl9pIOhO2W{R9TVg+MvN;1uCuU6=c#nsYJS?H@oslg% zoxb##G*sEL(-!}nnN}DEtNogt-}CB_*5P>P99z6>$!uG(vk-)$CMfZ8=>_LFC46;M3z_TU|QVt96pDgJjo|<%Z@`7_7LS z8?W}MaU$tTS~F>U)b{@i+Fj4_<48QHw`~L6()zu7M65z>pq}#Es?g1sZrx8hhM8x% zJ~fE0Wpm%u{YX#p*GD=3|EuhB?E?RI?Q(6xc-DBzc#GfR!~A-0h+z8%2XcS_2!H?x zfB*=900@8p2!H?x?2y0-RgN>^8j_QFHj1q{dZFoe#Xw{D-Oe|CF&yYb&H1UxW*o%U z;T2H5RT<3)ssolo9%D^5|2Qm61hb+Xf znJY&g&voU9!wXHJ5j8b&eN@I{3Zm9ojGrEMo#l+qTyZz>#tmA^`HEn)nB^#hrhl*TEFzi!bsrO zi>&_g^$%{a`j6kgDZVcquD|?VVdRN8@ajY3^`G4=jEo&@OdVt0$Xo0<_Ef#3efJ+G z2L1f>Yj3d6>HUARGhc5Vo11y2N#jua#Rzp6iMA6}%Z4V;KWS=_mh1oDQBhjhK1rtYZw?CPx9-z LDEg?&>Gb~q$g8fD diff --git a/crates/milli/.tmp4e121b/lock.mdb b/crates/milli/.tmp4e121b/lock.mdb deleted file mode 100644 index b4ab0527083cbd1fe238516d912af88bb8b62101..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8128 zcmeIvu?Ye(7>41Xjflm8jfI7UIDsQ*BZ>&Rha=n$f;P?~S2%|&xPYLD5&q^aH0_%v z9N+hoI$6yf8uNGLtAkObMSi>OBKe|}MAkLTGzx4nAb3Q)IKmi35P(T3% h6i`3`1r$&~0R4FdiWt2mloQp4>h6ni&@|9satDz zkKH}zaQSAKKkw7ujiCV9ycgeVXVOVSNdmk zS*7=9?p0Ejx|1@@#QFBcM|EF5b3YOw00JNY0w4eaAOHd&00JNY0w4eakAgtoL*JeM zul#l~OpyBPYww@YW##{Cac*_rSn6(^f&d7B00@8p2!H?xfB*=900@8p2s|1B%5P^Z z&z@!13fBsEwH0li-!2}RJXw6Dc(L?h=~(HLiPtBl%J1_}3#TStP4k;a$bKWvd%hy{*Xsvqwx^A1Js*LSA%-XiKW(ozmsG|668!ao;gV49#)sZVp zc7EZ(I@`w=M^+eHIm5^-VV@o7meUE>$=s0@l7*ko=ySVkYdcgI+xK)|pVhO2Fxa6w zvhH(zZPX5WI|z5!zO}wC*Yh1?&Aw!rar5r3l~Z+&t&|;Thb@w-v*;%~#22^5A@;oI zn|jB$+ICOR_V1C+d!Wva7uccgxx2ITVE=m}4Q_c|H{790>N(R?^IcXsqidH{C0z&O z7BOsJG>g}>9ebDG;2CE1j8>?pTY5MBZz|JgcBAR3v}HR}lsr|eUTFEQA)ik4??*hS zNAAMZ1|_QDt}NQe9K&|$QPhvN_s_CdRGeKMqyMB+><+)fhkckK9|(W|2!H?xfB*=9 z00@8p2!H?xfWYHKfP(Y~#0a$?h=DO-S~My^j=oxdkNE%N)MBs&1V8`;KmY_l;9&$P zc7pi-!}yR~O?Dt!i2o!0zjc3!u4elT;{T?O`2SYDCfi>S|KExgUCR>NyZFC~_m9UW zs$=B;Kg}-l%ls`l2LSW`w{Jt_009sH0T2KI5ZHSH@c)Nlp8cf>{~!MU_Pr+CXW;+C z|KGaLWcv&J|E+07SJT9f$m~Vd@-4Au%+!srHea7#((A|d#^Ra!a&zfKb75ibL}O|G zON)zVYbt-YV-A&ntN6d#cc13}-{QA0|9|_&Lk&Sesd@GcJI#N^ zf5y*nlYhiN;5~kt|AYUA|CrfB*=900@8p2>f3O!2cfxfBKgu{C~XvZ}dI` z{~!MU*1abCjCs=hfAXluSl}2ElS+$td@6lFyirod=~J?CCd2WO#G|{$nGDsZ@{BPN z(@&+9=dSY0IRL5a<4lJ9Gm>`ndeM|b*y=elU6h~EN96y{34i}p{(XLfU*X^7r^H-< z=lE?thcy8SC7gf&2!H?xfB*=900@8p2!H?xJP8EI=PoE8J$j8EQ?l5TPs#R^mz|On zly9Dr?PqeFAS00;e@aqdu?L@$3GIJ!zTt2U4_xsr?k6doJr*g*LymY=v7LOIk>G&#FJfa{&^eUGe zE0CM`DTy2_kO`9XefvxfxzT?{A{&d|4G^8?*);oD#Q#5rr~g^*#;_d(KmY_l00ck) z1V8`;KmY_l;E5za9={kc$dRX?q-o*+D<@R)?5k|Ee3Uo2tDV*E@)iCsZIi#PUDkfV zKPi8peOj)FnE|ufX8Di&51h?kI)bdu%i{sG$xEEk2W2g=Ju>15$LY0y}XB&t-wOX(gX-N`a` zWl1iwVW31|Nu>QgEd@3vOQnVHN^_3x9?&c(MOF{gTUtHPCZc{)D=n{2v{J4CqOk=s zgsyk&})E$J;Iv{pTTUAN6qRWh>cu=o*ct(iiB zZh9^8**02Ms0X2MyQ{&dN=Ya7|HuCS+xMJo-^Tub58D4vJxt;y*eR&$-Rf#?HGT7@ z+4G(+zQdWXbo?!TxGuFmTkX@9(be$+JG4EQ9uM2mA?g*^V8q z52^8RTx+G7G(Ktj|9P#hW4U1<9@N{mfof^>-aR5#qBf9E`E6CG=1aEjCms8kW7sY= zh$`j&zNz|=n&hvJ(ft3T>`M79epI_s{<`+6*3vEVslt!t5JvA(>#!PNWax*b!DQJT6D*5>Q;OM3md-dH?SUv4g)Xf70VBj&4(UoyyIuP@SKW2QgX{4RMHJHai-rJl~B*C|;=9CYfmU zLUFP|V8bmR=V{}Ai4aW-3iDa!wn(Cr%v8Gv}IyfhD zL|5yzW>nT}tszcio{ia>EAoolYt8u$_C;C7l=Qd*G2&sh?Vc6r?I4uP>ZvM&pM-$1 zGZ%Eu$jP3FH9Y9X%8%Yg9c_s@S|l^@)=DLyxk zR9=2RH~viMIhC=Q%1>_Q#-|Tf8%wMkIBRXoY*Y%$*Z)pz&`(!h|9R4W!@e(&#-R>~ z5o$jWZ6Q=e4M(1T+;k!xt^a?M-x53j#bx0D0w4eaAOHd&00JNY0w4eaAOHd&u*U?d kvOKYZATC!O<+gr8CO0e;M_Pfv*i4*X%|%g1b)QQA7g+hN<^TWy diff --git a/crates/milli/.tmpNxMsye/lock.mdb b/crates/milli/.tmpNxMsye/lock.mdb deleted file mode 100644 index abe89541a2a8b5633c06e4203f2fe5fac9ab0262..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8128 zcmeH@AqoOf7zP(DCPBTzA|_EViNR [usize; 2] { match words_count.cmp(&crop_size) { - Ordering::Less => get_adjusted_indices_for_too_few_words( + Ordering::Equal | Ordering::Less => get_adjusted_indices_for_too_few_words( tokens, index_backward, index_forward, words_count, crop_size, ), - Ordering::Equal => [index_backward, index_forward], Ordering::Greater => [ index_backward, get_adjusted_index_forward_for_too_many_words( diff --git a/crates/milli/src/search/new/matches/matching_words.rs b/crates/milli/src/search/new/matches/matching_words.rs index ab7f90f05..3edc3eb38 100644 --- a/crates/milli/src/search/new/matches/matching_words.rs +++ b/crates/milli/src/search/new/matches/matching_words.rs @@ -247,12 +247,22 @@ impl MatchingWords { // TODO: There is potentially an optimization to be made here // if we matched a term then we can skip checking it for further iterations? + println!( + "{:?}", + self.located_matching_words + .iter() + .flat_map(|lw| lw.value.iter().map(move |w| ( + lw.is_prefix, + lw.original_char_count, + self.word_interner.get(*w) + ))) + .collect::>() + ); + self.located_matching_words .iter() - .flat_map(|lw| lw.value.iter().map(move |w| (lw, w))) + .flat_map(|lw| lw.value.iter().map(move |w| (lw, self.word_interner.get(*w)))) .find_map(|(located_words, word)| { - let word = self.word_interner.get(*word); - let [char_count, byte_len] = match PrefixedOrEquality::new(tph.token.lemma(), word, located_words.is_prefix) { @@ -368,93 +378,105 @@ impl Debug for MatchingWords { } } -// #[cfg(test)] -// pub(crate) mod tests { -// use super::super::super::located_query_terms_from_tokens; -// use super::*; -// use crate::search::new::matches::tests::temp_index_with_documents; -// use crate::search::new::query_term::ExtractedTokens; -// use charabia::{TokenKind, TokenizerBuilder}; -// use std::borrow::Cow; +#[cfg(test)] +mod tests { + use super::super::super::located_query_terms_from_tokens; + use super::*; + use crate::index::tests::TempIndex; + use crate::search::new::query_term::ExtractedTokens; + use charabia::{TokenKind, TokenizerBuilder}; + use std::borrow::Cow; -// #[test] -// fn matching_words() { -// let temp_index = temp_index_with_documents(None); -// let rtxn = temp_index.read_txn().unwrap(); -// let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap(); -// let mut builder = TokenizerBuilder::default(); -// let tokenizer = builder.build(); -// let text = "split this world"; -// let tokens = tokenizer.tokenize(text); -// let ExtractedTokens { query_terms, .. } = -// located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap(); -// let matching_words = MatchingWords::new(ctx, &query_terms); + fn temp_index_with_documents() -> TempIndex { + let temp_index = TempIndex::new(); + temp_index + .add_documents(documents!([ + { "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" }, + { "id": 2, "name": "Westfália" }, + { "id": 3, "name": "Ŵôřlḑôle" }, + ])) + .unwrap(); + temp_index + } -// assert_eq!( -// matching_words.get_matches_and_query_positions( -// &[ -// Token { -// kind: TokenKind::Word, -// lemma: Cow::Borrowed("split"), -// char_end: "split".chars().count(), -// byte_end: "split".len(), -// ..Default::default() -// }, -// Token { -// kind: TokenKind::Word, -// lemma: Cow::Borrowed("nyc"), -// char_end: "nyc".chars().count(), -// byte_end: "nyc".len(), -// ..Default::default() -// }, -// Token { -// kind: TokenKind::Word, -// lemma: Cow::Borrowed("world"), -// char_end: "world".chars().count(), -// byte_end: "world".len(), -// ..Default::default() -// }, -// Token { -// kind: TokenKind::Word, -// lemma: Cow::Borrowed("worlded"), -// char_end: "worlded".chars().count(), -// byte_end: "worlded".len(), -// ..Default::default() -// }, -// Token { -// kind: TokenKind::Word, -// lemma: Cow::Borrowed("thisnew"), -// char_end: "thisnew".chars().count(), -// byte_end: "thisnew".len(), -// ..Default::default() -// } -// ], -// text -// ), -// ( -// vec![ -// Match { -// char_count: 5, -// byte_len: 5, -// position: MatchPosition::Word { word_position: 0, token_position: 0 } -// }, -// Match { -// char_count: 5, -// byte_len: 5, -// position: MatchPosition::Word { word_position: 2, token_position: 2 } -// }, -// Match { -// char_count: 5, -// byte_len: 5, -// position: MatchPosition::Word { word_position: 3, token_position: 3 } -// } -// ], -// vec![ -// QueryPosition { range: [0, 0], index: 0 }, -// QueryPosition { range: [2, 2], index: 1 }, -// QueryPosition { range: [2, 2], index: 2 } -// ] -// ) -// ); -// } -// } + #[test] + fn matching_words() { + let temp_index = temp_index_with_documents(); + let rtxn = temp_index.read_txn().unwrap(); + let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap(); + let mut builder = TokenizerBuilder::default(); + let tokenizer = builder.build(); + let text = "split this world"; + let tokens = tokenizer.tokenize(text); + let ExtractedTokens { query_terms, .. } = + located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap(); + let matching_words = MatchingWords::new(ctx, &query_terms); + + assert_eq!( + matching_words.get_matches_and_query_positions( + &[ + Token { + kind: TokenKind::Word, + lemma: Cow::Borrowed("split"), + char_end: "split".chars().count(), + byte_end: "split".len(), + ..Default::default() + }, + Token { + kind: TokenKind::Word, + lemma: Cow::Borrowed("nyc"), + char_end: "nyc".chars().count(), + byte_end: "nyc".len(), + ..Default::default() + }, + Token { + kind: TokenKind::Word, + lemma: Cow::Borrowed("world"), + char_end: "world".chars().count(), + byte_end: "world".len(), + ..Default::default() + }, + Token { + kind: TokenKind::Word, + lemma: Cow::Borrowed("worlded"), + char_end: "worlded".chars().count(), + byte_end: "worlded".len(), + ..Default::default() + }, + Token { + kind: TokenKind::Word, + lemma: Cow::Borrowed("thisnew"), + char_end: "thisnew".chars().count(), + byte_end: "thisnew".len(), + ..Default::default() + } + ], + text + ), + ( + vec![ + Match { + char_count: 5, + byte_len: 5, + position: MatchPosition::Word { word_position: 0, token_position: 0 } + }, + Match { + char_count: 5, + byte_len: 5, + position: MatchPosition::Word { word_position: 2, token_position: 2 } + }, + Match { + char_count: 5, + byte_len: 5, + position: MatchPosition::Word { word_position: 3, token_position: 3 } + } + ], + vec![ + QueryPosition { range: [0, 0], index: 0 }, + QueryPosition { range: [2, 2], index: 1 }, + QueryPosition { range: [2, 2], index: 2 } + ] + ) + ); + } +} diff --git a/crates/milli/src/search/new/matches/mod.rs b/crates/milli/src/search/new/matches/mod.rs index bab82da8c..f47582af7 100644 --- a/crates/milli/src/search/new/matches/mod.rs +++ b/crates/milli/src/search/new/matches/mod.rs @@ -200,7 +200,7 @@ mod tests { format_options: Option, text: &str, query: &str, - expected_text: &str, + expected_maybe_text: Option<&str>, ) { let temp_index = TempIndex::new(); @@ -216,7 +216,28 @@ mod tests { let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query); let mut matcher = builder.build(text, None); - assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string())); + assert_eq!( + matcher.get_formatted_text(format_options), + expected_maybe_text.map(|v| v.to_string()) + ); + } + + struct FormatVariations<'a> { + highlight_with_crop: Option<&'a str>, + highlight: Option<&'a str>, + crop: Option<&'a str>, + } + + impl<'a> FormatVariations<'a> { + fn get(&self) -> [(Option, Option<&'a str>); 5] { + [ + (None, None), + (Some(FormatOptions { highlight: true, crop: Some(2) }), self.highlight_with_crop), + (Some(FormatOptions { highlight: true, crop: None }), self.highlight), + (Some(FormatOptions { highlight: false, crop: Some(2) }), self.crop), + (Some(FormatOptions { highlight: false, crop: None }), None), + ] + } } /// "Dei store fiskane eta dei små — dei liger under som minst förmå." @@ -225,77 +246,66 @@ mod tests { fn rename_me_with_base_text( format_options: Option, query: &str, - expected_text: &str, + expected_maybe_text: Option<&str>, ) { rename_me( format_options, "Dei store fiskane eta dei små — dei liger under som minst förmå.", query, - expected_text, + expected_maybe_text, ); } #[test] - fn phrase_highlight_bigger_than_crop() { - rename_me_with_base_text( - Some(FormatOptions { highlight: true, crop: Some(1) }), - "\"dei liger\"", - "…dei…", - ); + fn empty_query() { + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("Dei store…"), + highlight: None, + crop: Some("Dei store…"), + } + .get()) + { + rename_me_with_base_text(format_options, "", expected_maybe_text); + } } #[test] - fn phrase_highlight_same_size_as_crop() { - rename_me_with_base_text( - Some(FormatOptions { highlight: true, crop: Some(2) }), - "\"dei liger\"", - "…dei liger…", - ); - } - - #[test] - fn phrase_highlight_crop_middle() { - rename_me_with_base_text( - Some(FormatOptions { highlight: true, crop: Some(4) }), - "\"dei liger\"", - "…små — dei liger under…", - ); - } - - #[test] - fn phrase_highlight_crop_end() { - rename_me_with_base_text( - Some(FormatOptions { highlight: true, crop: Some(4) }), - "\"minst förmå\"", - "…under som minst förmå.", - ); - } - - #[test] - fn phrase_highlight_crop_beginning() { - rename_me_with_base_text( - Some(FormatOptions { highlight: true, crop: Some(4) }), - "\"Dei store\"", - "Dei store fiskane eta…", - ); + fn only_separators() { + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some(":-…"), + highlight: None, + crop: Some(":-…"), + } + .get()) + { + rename_me(format_options, ":-)", ":-)", expected_maybe_text); + } } #[test] fn highlight_end() { - rename_me_with_base_text( - Some(FormatOptions { highlight: true, crop: None }), - "minst förmå", - "Dei store fiskane eta dei små — dei liger under som minst förmå.", - ); + // TODO: Why is "förmå" marked as prefix in located matching words? + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("…minst förmå."), + highlight: Some("Dei store fiskane eta dei små — dei liger under som minst förmå."), + crop: Some("…minst förmå."), + } + .get()) { + rename_me_with_base_text(format_options, "minst förmå", expected_maybe_text); + } } #[test] fn highlight_beginning_and_middle() { - rename_me_with_base_text( - Some(FormatOptions { highlight: true, crop: None }), - "Dei store", - "Dei store fiskane eta dei små — dei liger under som minst förmå.", - ); + // TODO: Why is "store" marked as prefix in located matching words? + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("Dei store…"), + highlight: Some("Dei store fiskane eta dei små — dei liger under som minst förmå."), + crop: Some("Dei store…"), + } + .get()) { + rename_me_with_base_text(format_options, "Dei store", expected_maybe_text); + } } #[test] @@ -306,291 +316,185 @@ mod tests { // `milli::search::new::query_term::QueryTerm::all_computed_derivations` might be at fault here // interned words = ["forma"] - rename_me( - Some(FormatOptions { highlight: true, crop: None }), - "altså, förmå, på en måte", - "fo", - "altså, förmå, på en måte", - ); + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("…förmå, på…"), + highlight: Some("altså, förmå, på en måte"), + crop: Some("…förmå, på…"), + } + .get()) + { + rename_me(format_options, "altså, förmå, på en måte", "fo", expected_maybe_text); + } // interned words = ["fo", "forma"] - rename_me( - Some(FormatOptions { highlight: true, crop: None }), - "altså, fo förmå, på en måte", - "fo", - "altså, fo rmå, på en måte", - ); + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("…fo rmå…"), + highlight: Some("altså, fo rmå, på en måte"), + crop: Some("…fo förmå…"), + } + .get()) + { + rename_me(format_options, "altså, fo förmå, på en måte", "fo", expected_maybe_text); + } } #[test] fn partial_match_end() { - rename_me( - Some(FormatOptions { highlight: true, crop: None }), - "förmå, på en måte", - "fo", - "förmå, på en måte", - ); + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("förmå, på…"), + highlight: Some("förmå, på en måte"), + crop: Some("förmå, på…"), + } + .get()) + { + rename_me(format_options, "förmå, på en måte", "fo", expected_maybe_text); + } - rename_me( - Some(FormatOptions { highlight: true, crop: None }), - "fo förmå, på en måte", - "fo", - "fo rmå, på en måte", - ); + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("fo rmå…"), + highlight: Some("fo rmå, på en måte"), + crop: Some("fo förmå…"), + } + .get()) + { + rename_me(format_options, "fo förmå, på en måte", "fo", expected_maybe_text); + } } #[test] fn partial_match_beginning() { - rename_me( - Some(FormatOptions { highlight: true, crop: None }), - "altså, förmå", - "fo", - "altså, förmå", - ); + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("altså, förmå"), + highlight: Some("altså, förmå"), + crop: Some("altså, förmå"), + } + .get()) + { + rename_me(format_options, "altså, förmå", "fo", expected_maybe_text); + } - rename_me( - Some(FormatOptions { highlight: true, crop: None }), - "altså, fo förmå", - "fo", - "altså, fo rmå", + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("…fo rmå"), + highlight: Some("altså, fo rmå"), + crop: Some("…fo förmå"), + } + .get()) + { + rename_me(format_options, "altså, fo förmå", "fo", expected_maybe_text); + } + } + + #[test] + fn separator_at_end() { + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("…minst förmå. , ;"), + highlight: Some("; , — dei liger under som minst förmå. , ;"), + crop: Some("…minst förmå. , ;"), + } + .get()) + { + rename_me( + format_options, + "; , — dei liger under som minst förmå. , ;", + "minst", + expected_maybe_text, + ); + } + } + + #[test] + fn separator_at_beginning() { + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("; , — dei liger…"), + highlight: Some("; , — dei liger under som minst förmå. , ;"), + crop: Some("; , — dei liger…"), + } + .get()) + { + rename_me( + format_options, + "; , — dei liger under som minst förmå. , ;", + "dei", + expected_maybe_text, + ); + } + } + + #[test] + fn phrase() { + for (format_options, expected_maybe_text) in (FormatVariations { + highlight_with_crop: Some("…dei liger…"), + highlight: Some( + "Dei store fiskane eta dei små — dei liger under som minst förmå.", + ), + crop: Some("…dei liger…"), + } + .get()) + { + rename_me_with_base_text(format_options, "\"dei liger\"", expected_maybe_text); + } + } + + #[test] + fn phrase_highlight_bigger_than_crop() { + rename_me_with_base_text( + Some(FormatOptions { highlight: true, crop: Some(1) }), + "\"dei liger\"", + Some("…dei…"), ); } - // #[test] - // fn format_identity() { - // let temp_index = temp_index_with_documents(None); - // let rtxn = temp_index.read_txn().unwrap(); - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world"); - // let format_options = Some(FormatOptions { highlight: false, crop: None }); + #[test] + fn phrase_bigger_than_crop() { + rename_me_with_base_text( + Some(FormatOptions { highlight: false, crop: Some(1) }), + "\"dei liger\"", + Some("…dei…"), + ); + } - // let test_values = [ - // // Text without any match. - // "A quick brown fox can not jump 32 feet, right? Brr, it is cold!", - // // Text containing all matches. - // "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.", - // // Text containing some matches. - // "Natalie risk her future to build a world with the boy she loves." - // ]; + #[test] + fn phrase_highlight_crop_middle() { + rename_me_with_base_text( + Some(FormatOptions { highlight: true, crop: Some(4) }), + "\"dei liger\"", + Some("…små — dei liger under…"), + ); + } - // for text in test_values { - // let mut matcher = builder.build(text, None); - // // no crop and no highlight should return complete text. - // assert_eq!(matcher.get_formatted_text(format_options), None); - // } - // } + #[test] + fn phrase_crop_middle() { + rename_me_with_base_text( + Some(FormatOptions { highlight: false, crop: Some(4) }), + "\"dei liger\"", + Some("…små — dei liger under…"), + ); + } - // #[test] - // fn format_highlight() { - // let temp_index = temp_index_with_documents(None); - // let rtxn = temp_index.read_txn().unwrap(); - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world"); - // let format_options = Some(FormatOptions { highlight: true, crop: None }); + #[test] + fn phrase_highlight_crop_end() { + rename_me_with_base_text( + Some(FormatOptions { highlight: true, crop: Some(4) }), + "\"minst förmå\"", + Some("…under som minst förmå."), + ); + } - // let test_values = [ - // // empty text. - // ["", ""], - // // text containing only separators. - // [":-)", ":-)"], - // // Text without any match. - // ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!", - // "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"], - // // Text containing all matches. - // ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.", - // "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."], - // // Text containing some matches. - // ["Natalie risk her future to build a world with the boy she loves.", - // "Natalie risk her future to build a world with the boy she loves."], - // ]; + #[test] + fn phrase_crop_end() { + rename_me_with_base_text( + Some(FormatOptions { highlight: false, crop: Some(4) }), + "\"minst förmå\"", + Some("…under som minst förmå."), + ); + } - // for [text, expected_text] in test_values { - // let mut matcher = builder.build(text, None); - // // no crop should return complete text with highlighted matches. - // assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string())); - // } - // } - - // #[test] - // fn highlight_unicode() { - // let temp_index = temp_index_with_documents(None); - // let rtxn = temp_index.read_txn().unwrap(); - // let format_options = Some(FormatOptions { highlight: true, crop: None }); - - // let test_values = [ - // // Text containing prefix match. - // ["world", "Ŵôřlḑôle", "Ŵôřlḑôle"], - // // Text containing unicode match. - // ["world", "Ŵôřlḑ", "Ŵôřlḑ"], - // // Text containing unicode match. - // ["westfali", "Westfália", "Westfália"], - // ]; - - // for [query, text, expected_text] in test_values { - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query); - // let mut matcher = builder.build(text, None); - // // no crop should return complete text with highlighted matches. - // assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string())); - // } - // } - - // #[test] - // fn format_crop() { - // let temp_index = temp_index_with_documents(None); - // let rtxn = temp_index.read_txn().unwrap(); - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world"); - // let format_options = Some(FormatOptions { highlight: false, crop: Some(10) }); - - // let test_values = [ - // // empty text. - // // ["", ""], - // // text containing only separators. - // // [":-)", ":-)"], - // // Text without any match. - // ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!", - // "A quick brown fox can not jump 32 feet, right…"], - // // Text without any match starting by a separator. - // ["(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)", - // "(A quick brown fox can not jump 32 feet, right…" ], - // // Test phrase propagation - // ["Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.", - // "…Split The World is a book written by Emily Henry…"], - // // Text containing some matches. - // ["Natalie risk her future to build a world with the boy she loves.", - // "…future to build a world with the boy she loves."], - // // Text containing all matches. - // ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.", - // "…she loves. Emily Henry: The Love That Split The World."], - // // Text containing a match unordered and a match ordered. - // ["The world split void void void void void void void void void split the world void void", - // "…void void void void void split the world void void"], - // // Text containing matches with different density. - // ["split void the void void world void void void void void void void void void void split the world void void", - // "…void void void void void split the world void void"], - // ["split split split split split split void void void void void void void void void void split the world void void", - // "…void void void void void split the world void void"] - // ]; - - // for [text, expected_text] in test_values { - // let mut matcher = builder.build(text, None); - // // no crop should return complete text with highlighted matches. - // assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string())); - // } - // } - - // #[test] - // fn format_highlight_crop() { - // let temp_index = temp_index_with_documents(None); - // let rtxn = temp_index.read_txn().unwrap(); - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world"); - // let format_options = Some(FormatOptions { highlight: true, crop: Some(10) }); - - // let test_values = [ - // // empty text. - // ["", ""], - // // text containing only separators. - // [":-)", ":-)"], - // // Text without any match. - // ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!", - // "A quick brown fox can not jump 32 feet, right…"], - // // Text containing some matches. - // ["Natalie risk her future to build a world with the boy she loves.", - // "…future to build a world with the boy she loves."], - // // Text containing all matches. - // ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.", - // "…she loves. Emily Henry: The Love That Split The World."], - // // Text containing a match unordered and a match ordered. - // ["The world split void void void void void void void void void split the world void void", - // "…void void void void void split the world void void"] - // ]; - - // for [text, expected_text] in test_values { - // let mut matcher = builder.build(text, None); - // // no crop should return complete text with highlighted matches. - // assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string())); - // } - // } - - // #[test] - // fn format_highlight_crop_phrase_query() { - // //! testing: https://github.com/meilisearch/meilisearch/issues/3975 - // let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!"; - // let temp_index = temp_index_with_documents(Some(documents!([ - // { "id": 1, "text": text } - // ]))); - // let rtxn = temp_index.read_txn().unwrap(); - - // let format_options = Some(FormatOptions { highlight: true, crop: Some(10) }); - - // let test_values = [ - // // should return 10 words with a marker at the start as well the end, and the highlighted matches. - // ["\"the world\"", - // "…the power to split the world between those who embraced…"], - // // should highlight "those" and the phrase "and those". - // ["those \"and those\"", - // "…world between those who embraced progress and those who resisted…"], - // ["\"The groundbreaking invention had the power to split the world\"", - // "The groundbreaking invention had the power to split the world…"], - // ["\"The groundbreaking invention had the power to split the world between those\"", - // "The groundbreaking invention had the power to split the world…"], - // ["\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"", - // "…between those who embraced progress and those who resisted change!"], - // ["\"groundbreaking invention\" \"split the world between\"", - // "…groundbreaking invention had the power to split the world between…"], - // ["\"groundbreaking invention\" \"had the power to split the world between those\"", - // "…invention had the power to split the world between those…"], - // ]; - - // for [query, expected_text] in test_values { - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query); - // let mut matcher = builder.build(text, None); - - // assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string())); - // } - // } - - // #[test] - // fn smaller_crop_size() { - // //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295 - // let temp_index = temp_index_with_documents(None); - // let rtxn = temp_index.read_txn().unwrap(); - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world"); - // let text = "void void split the world void void."; - // let mut matcher = builder.build(text, None); - - // let test_values = [ - // // set a smaller crop size - // // because crop size < query size, partially format matches. - // (2, "…split the…"), - // // set a smaller crop size - // // because crop size < query size, partially format matches. - // (1, "…split…"), - // // set crop size to 0 - // // because crop size is 0, crop is ignored. - // (0, "void void split the world void void."), - // ]; - - // for (crop_size, expected_text) in test_values { - // // set a smaller crop size - // let format_options = Some(FormatOptions { highlight: false, crop: Some(crop_size) }); - // assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string())); - // } - // } - - // #[test] - // fn partial_matches() { - // let temp_index = temp_index_with_documents(None); - // let rtxn = temp_index.read_txn().unwrap(); - // let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "the \"t he\" door \"do or\""); - - // let format_options = Some(FormatOptions { highlight: true, crop: None }); - - // let text = "the do or die can't be he do and or isn't he"; - // let mut matcher = builder.build(text, None); - // assert_eq!( - // matcher.get_formatted_text(format_options), - // Some( - // "the do or die can't be he do and or isn't he" - // .to_string() - // ) - // ); - // } + #[test] + fn phrase_highlight_crop_beginning() { + rename_me_with_base_text( + Some(FormatOptions { highlight: true, crop: Some(4) }), + "\"Dei store\"", + Some("Dei store fiskane eta…"), + ); + } }