From 409bc6b4247d27a6fdc4a9ff6aede3b5684a8ca3 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Fri, 20 Jun 2025 09:46:39 +0300
Subject: [PATCH] Added more tests, fixed issue

---
 crates/milli/.tmp4e121b/data.mdb              | Bin 69632 -> 0 bytes
 crates/milli/.tmp4e121b/lock.mdb              | Bin 8128 -> 0 bytes
 crates/milli/.tmpNxMsye/data.mdb              | Bin 69632 -> 0 bytes
 crates/milli/.tmpNxMsye/lock.mdb              | Bin 8128 -> 0 bytes
 .../src/search/new/matches/adjust_indices.rs  |  43 +-
 .../src/search/new/matches/matching_words.rs  | 204 ++++---
 crates/milli/src/search/new/matches/mod.rs    | 544 ++++++++----------
 7 files changed, 368 insertions(+), 423 deletions(-)
 delete mode 100644 crates/milli/.tmp4e121b/data.mdb
 delete mode 100644 crates/milli/.tmp4e121b/lock.mdb
 delete mode 100644 crates/milli/.tmpNxMsye/data.mdb
 delete mode 100644 crates/milli/.tmpNxMsye/lock.mdb
diff --git a/crates/milli/.tmp4e121b/data.mdb b/crates/milli/.tmp4e121b/data.mdb
deleted file mode 100644
index f6705d4f179f23fcb1b8d7064e7e1f3d8fedf335..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 69632
zcmeI5UuYc19mnTRl6|^AI~P}^sx8cMloX8CoK)GBgda?BX`s|C)Pyz<G3(nsv9)$r
z?CvR2f*=hp3B?blgg~JL6R62UA4&<K1Y=q!O$~;kx0qr=2&Ofqv?cVR525MI{C4(s
zC0n|a9M!?!d$`^C?aXg}`<c0we)F4|WlVoc>g2=U{qtHui_~$AQgy~ktaqh%MwfMZ
zU#_f{vP?<JFrMTaPL8@<F;^Z55C8!X009sH0T2KI5C8!X009sHfd@gL=b`V;|JQ!I
z7$(SkdfIzubXoiVMv_~X8_JZ%DF}c72!H?xfB*=900@8p2!H?xfWU(xp#65nitHr2
zTDn@gYn(Qw`R(%I>WT8j@^WRpa-{ON(bq-~)ZXL&Ej>B<BDZUw)_z@k-S|=UTJ^zL
z0WBZ^0w4eaAOHd&&>w-8$W~;d%nsUJ&++4^zhaDimV6j#sIq0J9S5Olg<-Hds1nj~
zNb7LCbB=90q1m=#XCVkzP1hb&Wn$L^>$uLMEfkpI7R7hRYCEwR#i8pj41BX>=U48n
zvvYiAV1<d5<BZG__PLSicuVmrnLDsTvhd3}eSUXwafj*>`yTJ<b5`6aj&`Vytou?=
z8?}SkiQ*l$Z?mV%4?@pcbf0(Zq<MGO%9(pktW+Inhb@x1XVFh~h%af4L+rUAw9Tc^
z>9{LqzJCvF-aU0L1(6%OfxkOD5B7d1%HVd;_2V6?q@FWIcfQM8&gt6at&*;TNsCx+
zD4Hc$aXojJzu<A^tXS>X%(nDy`rm<EpV^J3$Fi2~Oi}t&aaLj{^ey#tqCY?4K|OF6
zrZ%Y19q!7aVdhz`PmiKrw7qwhzoO&p`Y8XB9$<I)9p3N54EaC+1V8`;KmY_l00ck)
z1V8`;KmY_DCIS?s-!Dd}!$=H_3De}L06F@4{XOFU4^xZ577zdd5C8!X0D=1vpx6oG
z|M%lVay{9BXd(WO`2W`ZMPAMK8N~l>6Y>A8dQHB+ApXA<E4r2^ws-M=9q%7WOw>o|
z|3A(y@C*DcH3tCm|F>^L<NyH>009sH0T9@G0`UL)VV?g_6aGK^|LuECzR$q_hyTBI
zpUL+Z`2Snel2^0D4y){C)(#!9X3RFNxH;XLK4!L#n^QAqT66QQnfa-yqu-dFo12?C
z_Doaf?=IPW<=-m)ulL=j`Tw{0EzJMlzVVO)1V8`;KmY_l00ck)1V8`;KmY_D5(2cp
zen~|2>7$Q2{;$*fQ+wt5|110o{Qr~)PCx(zKmY_l00ck)1V8`;KmY_lU~dVWR@LdX
z`h_W0_*V0j<2E9(Ypv7hxY2pbcN$J?HEb#M-u-3c;}>65iA8t82^(EsY)RXQ0<Yn@
zeiS!4@4g+1Tt}Pq)<3CU`v0HsPcZ*~Z<`jifdB}A00@8p2n<93^Z)z7m7Qr?WSIYt
z`TyJZntY$Z{C~{<-@4D_`wQm(Z%s>H%@Wh|`S)i2KaC<3*pxDIG&Qxzo?xf=I{yVf
z!<YFr{vrP<KgIvWf5mN%_y47sZ~_7#00JNY0wC~zB>?}wAN<+>G~xf_{eOe^8TkM3
z|F`Zn`De_d=KqsNJ;Wl<QkZmF#N#vR5vGFh8OeU}j!6}{m$HCLjW89U&*U0nBBq~7
zk1!SWmy*wPnT~vYsFe65(cZnjt|kG<>x?AA*1%Kgvig=^(*HloPx9ORDE|$AU3mT9
z<Nx3n`3=5;H32CloPYobfB*=900@8p2!H?xfB*<Q3Ixb|-p7<@FJGdU_|wBlG+n1l
z+KbQRDY3*0&&iT8fsAY)`Q{l(i6#DhMpo{P9b}{>JvNY$?W6I7oJ5UOC`l!4wjw*i
z%KW$dC9wy<4gM<s0r&X?|2r@9KZ|()EsO!Aq;LWPAOHd&00JNY0w4eaAOHd&@I@w|
zeS75_tJmU@B=y_XR(V>M`0Z->yp|<#`HZKX@zT|Xdpc+0&nxGfWQmuq0BVUEWl$UT
zX^EN%pb{k6IPQ>WQJebfGAh!qruma39h*?0e|k57thUI;*hl;$G2{P>+={Rr1V8`;
zKmY_l00ck)1V8`;KmY_DMFQmW%RvM+ERoLtX9e>7Yblf>dx~AERr!C6N#jXloge3S
z#mxW9hHqTtS88t?f2lS38~kJAQtcgnjz4Xj=6^Cit&JJ$wRdW-7^xbZfB*=900@8p
z2!H?xfB*=900{I&fc&*GE6Ol*x@=oVZi_MioGb^|Ifp3hyh$p`rwh(?UR8dBw91C`
zGW28tJLMK~sy|(juC)|oUFjlpl9pIOhO2W{R9TVg+MvN;1uCuU6=c#nsYJS<J*`uQ
zu{Y=u`el8aw3OJ8vXGXzYYj!Zn`Gg8YPHHqi|2Jtb$^@L%lq3vs6e?k5KZb=;d^~&
zvPI=){j_vIO9eHlF-NV;3ZyS*skm2tpBr))Wxfr(s;r=M=d2KNVwmRdIqpSvl3gub
zE!{Ov8`J!D`Ed0_`C@swvR*k-!Tx`J{dkcH1V8`;KmY_l;BzN%pZ)(R&>?H@oslg%
zoxb##G*sEL(-!}nnN}DEtNogt-}CB_*5P>P99z6>$!uG(vk-)<rfUzXl962#;$K^5
z(H06!J7|mVj@5QzGm1mkUx)@(N;<LsKlcCMzUSooHunF!*ZzO{VY2x#qu;Hr=T_4%
zZ;U+`gyJ`x`$;Fi;`{5;>$CMfZ8=>_LFC46;M3z_TU|QVt96pDgJjo|<%Z@`7_7LS
z8?W}MaU$tTS~F>U)b{@i+Fj4_<48QHw`~L6()zu7M65z>pq}#Es?g1sZrx8hhM8x%
zJ~fE0Wpm%u{YX#p*GD=3|EuhB?E?RI?Q(6xc-DBzc#GfR!~A-0h+z8%2XcS_2!H?x
zfB*=900@8p2!H?x?2y0-RgN>^8j_QFHj1q{dZFoe#Xw{D-Oe|CF&yYb&H1UxW*o%U
z;<ugE@4L@C&H33x@U*qqbx1VRnw?z}?|GM>T2H5RT<3)ssolo9%D^5|2Qm61hb+Xf
znJY&g&voU9!wXH<rYfD4Se&fUm`8;zg|26bLC^Efsu=hDj?!Rg4Wb&;vzCPMmghug
zMY1_hnp(ovMAM{D9i3G<<kePlUe-0)oDwH0&)Q_u7kNeT=KS;;`<klbfbyUtF@oWA
z+!ZG>J5j8b&eN@I{3Zm9ojGrEMo#l+qTyZz>#tmA^`HEn)nB^#hrhl*TEFzi!bsrO
zi>&_g^$%{a`j6kgDZVcquD|?VVdRN8@ajY3^`G4=jEo&@OdVt0$Xo0<_Ef#3efJ+G
z2L1f>Yj3d6>HUARGhc5Vo11y2N#jua#Rzp6iMA6}%Z4V;KWS=_mh1oD<hS^ny&-|b
zNE|=_1V8`;KmY_l00ck)1V8`;KmY{xhCoAABUTn9)#;<!>QBhjhK1rtYZw?CPx9-z
LDEg?&>Gb~q$g8fD

diff --git a/crates/milli/.tmp4e121b/lock.mdb b/crates/milli/.tmp4e121b/lock.mdb
deleted file mode 100644
index b4ab0527083cbd1fe238516d912af88bb8b62101..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 8128
zcmeIvu?Ye(7>41Xjflm8jfI7UIDsQ*BZ>&Rha=n$f;P?~S2%|&xPYLD5&q^aH0_%v
z9N+ho<hfgOzqIOpr=32lm-k~;udb*0A%Eibotq6(I#02N!85OSVZ6l}262A5#u^53
zzS&_7gE&7deE(>I$6yf8uNGLtAkObMSi>OBKe|}MAkLTGzx4nAb3Q)IKmi35P(T3%
h6i`3`1r$&~0R<FLKmi35P(T3%6i`3`1r+#E-~}ggW}^TA

diff --git a/crates/milli/.tmpNxMsye/data.mdb b/crates/milli/.tmpNxMsye/data.mdb
deleted file mode 100644
index ea920733de21449c828d398e7d75f3a895a2cebb..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 69632
zcmeI5UuYc19mnTRmVLU@?Oarms<tu5VNx(!ai>4FdiWt2mloQp4>h6ni&@|9satDz
zkKH}z<W!Ibmy{BMC?QZN!IakIp$~lsc__v-f9e*ThfoUjg9)J+YVyz)(}zBkw6pWu
z+1owa(w*e04*u%lcIUUVzxmB)=2rKc-^?sy>aQSAKKkw7ujiCV9ycgeVXVOVSNdmk
zS*7=9?p0Ejx|1@@#QFBcM|EF5b3YOw00JNY0w4eaAOHd&00JNY0w4eakAgtoL*JeM
zul#l~OpyBPYww@YW##{Cac*_rSn6(^f&d7B00@8p2!H?xfB*=900@8p2s|1B%5P^Z
z&z@!13fBsEwH0li-!2}RJXw6Dc(L?h=~(HLiPtBl%J1_}3#TSt<YxKb<zJWI(7rc$
zee%&*0WBZ^0w4eaAOHd&FdTup$d+dl%=B7a$8y79xZ4=}PyEM7!z8<CwL;I=4d3_H
zM^!>P4k;a$bKWvd%hy{*Xsvqwx^A1Js*LSA%-XiKW(ozmsG|668!ao;gV49#)sZVp
zc7EZ(I@`w=M^+eHIm5^-VV@o7meUE>$=s0@l7*ko=ySVkYdcgI+xK)|pVhO2Fxa6w
zvhH(zZPX5WI|z5!zO}wC*Yh1?&Aw!rar5r3l~Z+&t&|;Thb@w-v*;%~#22^5A@;oI
zn|jB$+ICOR_V1C+d!Wva7uccgxx2ITVE=m}4Q_c|H{790>N(R?^IcXsqidH{C0z&O
z7BOsJG>g}>9ebDG;2CE1j8>?pTY5MBZz|JgcBAR3v}HR}lsr|eUTFEQA)ik4??*hS
zNAAMZ1|_QDt}NQe9K&|$QPhvN_s_CdRGeKMqyMB+><+)fhkckK9|(W|2!H?xfB*=9
z00@8p2!H?xfWYHKfP(Y~#0a$?h=DO-S~My^j=oxdkNE%N)MBs&1V8`;KmY_l;9&$P
zc7pi-!}yR~O?Dt!i2o!0zjc3!u4elT;{T?O`2SYDCfi>S|KExgUCR>NyZFC~_m9UW
zs$=B;Kg}-l%ls`l2LSW`w{Jt_009sH0T2KI5ZHSH@c)Nlp8cf>{~!MU_Pr+CXW;+C
z|KGaLWcv&J|E+07SJT9f$m~Vd@-4Au%+!srHea7#((A|d#^Ra!a&zfKb75ibL}O|G
zON)zVYbt-YV-A&ntN6d#cc13}-{QA0|9|_&Lk<uC0T2KI5C8!X009sH0T2KI5O_=o
z(Ej=b5!I(ZbyV?xl|GQz8?FDp%CExzPl(_I1V8`;KmY_l00ck)1V8`;KmY{xmcWWE
zPOa6?HCXN|wbPbe4aBasR<&&h7Yx^`TA@)jBdPP=FROof@im!PvsW#@+I7X2wAH|K
zs*ddjVYU6<+rG#(SEIN7Md~B}{~o`G`Tu*{v?vV(KmY_l00cl_Bm$WKKMbz)Ow&BW
z{C~{<-@eym`wZs)WB&iveJ0yqF#msRTG7=sF*TomZ|48gC_;`kq>&Sesd@GcJI#N^
zf5y*nlYhiN;5~kt|AYUA|Cr<be+ed>fB*=900@8p2>f3O!2cfxfBKgu{C~XvZ}dI`
z{~!MU*1abCjCs=hfAXluSl}2ElS+$td@6lFyirod=~J?CCd2WO#G|{$nGDsZ@{BPN
z(@&+9=dSY0IRL5a<4lJ9Gm>`ndeM|b*y=elU6h~EN96y{34i}p{(XLfU*X^7r^H-<
z=lE?thcy8SC7gf&2!H?xfB*=900@8p2!H?xJP8EI=PoE8J$j8EQ?l5TPs#R^mz|On
zly9Dr?PqeFAS00;e@aqdu?L@$3GI<jpOO^&BMd2NfyE;PDcQa_`p;M?X|_B&#)=~9
z|2Glwzs=v|-{vkq%wOY0{*ssnP{$ZRLJB7!00JNY0w4eaAOHd&00JNY0-t39<j2Qe
zwR|m}^6cYyy4)&H$>J!zTt2U4_xsr?k6doJr*g*LymY=v7LOIk>G&#FJfa{&^eUGe
zE0CM`DTy2_kO`9XefvxfxzT?{A{&d|4G^8?*);oD#Q#5rr~g^*#;_d(KmY_l00ck)
z1V8`;KmY_l;E5za9={kc$dRX?q-o*+D<@R)?5k|Ee3Uo2tDV*E@)iCsZIi#PUDkfV
zKPi8peOj)FnE|ufX8Di&5<jK2`5kRqyIQ_p{!{r4Em4CL5C8!X009sH0T2KI5C8!X
z0D++hkbhQW`Dk)?vXGmklWdG6$)a(Udo0SGEFAAkqwV3<@^O-iH^M~$<9JEA$|+?7
z_lwfhg7tV|dy<>1h?kI)bdu%i{sG$xEEk2W2g=Ju>15$LY0y}XB&t-wOX(gX-N`a`
zWl1iwVW31|Nu>QgEd@3vOQnVHN^_3x9?&c(MOF{gTUtHPCZc{)D=n{2v{J4CqOk=s
zgs<urwX|%dj8tfi%8oQA6pAcI`UbhHnhxa7NBIWoHOX=+cU)fIanG}7*|oy8!d-1e
zo9DNSM<!1eUnyQJeONkH!v23leR+`y1V8`;KmY_l;D1iwA^ZQyb&j&|{u$Y#(#cDY
zNy8+&Xtl(TFx~KdZ+%#^vwL10QaUW>yk&})E$J;Iv{pTTUAN6qRWh>cu=o*ct(iiB
zZh9^8**02Ms0X2MyQ{&dN=Ya7|HuCS+xMJo-^Tub58D4vJxt;y*eR&$-Rf#?HGT7@
z+4G(+zQdWXbo?!TxGuFmTkX@9(be$+JG4EQ9uM2<Qn6m8lXM*<yE=yL>mA?g*^V8q
z52^8RTx+G7G(Ktj|9P#hW4U1<9@N{mfof^>-aR5#qBf9E`E6CG=1aEjCms8kW7sY=
zh$`j&zNz|=n&hvJ(ft3T>`M79epI_s{<`+6*3<r3zQY&!z5WnEs*mFo1V8`;KmY_l
z00ck)1V8`;K;Y35I4RX}CR{^uGS3B}5e6^R+^!gC^uO8uvMYuIt)SLy%+^9LG}gXm
zt$)kDWYwC>vEVslt!t5JvA(>#!PNWax*b!DQJT6D*5>Q;OM3md-dH?SUv4g)Xf7<w
zomgI8TwY!{Tbq(Ll>0VBj&4(UoyyIuP@SKW2QgX{4RMHJHai-rJl~B*C|;=9CYfmU
zLUFP|V<aWk@omQtgQOR%bumu*4XHs>8bmR=V{}Ai4aW-3iDa!wn(Cr%v8Gv}IyfhD
zL|5yzW>nT}tszcio{ia>EAoolYt8u$_C;C7l=Qd*G2&sh?Vc6r?I4uP>ZvM&pM-$1
zGZ%Eu$jP3FH9Y9X%8%Y<l^?#tDlgsq-Cu7^R5pK~8~5x=o>g9c_s@S|l^@)=DLyxk
zR9=2RH~viMIhC=Q%1>_Q#-|Tf8%wMkIBRXoY*Y%$*Z)pz&`(!h|9R4W!@e(&#-R>~
z5o$jWZ6Q=e4M(1T+;k!xt^a?M-x53j#bx0D0w4eaAOHd&00JNY0w4eaAOHd&u*U?d
kvOKYZATC!O<+gr8CO0e;M_Pfv*i4*X%|%g1b)QQA7g+hN<^TWy

diff --git a/crates/milli/.tmpNxMsye/lock.mdb b/crates/milli/.tmpNxMsye/lock.mdb
deleted file mode 100644
index abe89541a2a8b5633c06e4203f2fe5fac9ab0262..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 8128
zcmeH@AqoOf7zP(DCPBTzA|_EViNR<Rn-?&MCop;tZy}gO_5d5iym$qR!KR2S{Dvtu
zVP1JWX1*8Zl3kCLO{-Q--ltyPSw1-JcdK*x#Ot%Wb(7@KM-9QFJTGA}L=8dQPui#<
zi2HPk8iKgr&QU`U_q7x?1aY5_P($#w@1TYt?u!O$2;zR-L=8dQ7c<ll#Qm~|8iKgL
zk5NMq_xTU~@neAnEMNf(Sik}nuz&?DU;ztQzycPqfCVgI0Sj2b0v51<1uXD)1)hh7
BeAxg1

diff --git a/crates/milli/src/search/new/matches/adjust_indices.rs b/crates/milli/src/search/new/matches/adjust_indices.rs
index b7d9ad793..6c5df2ddf 100644
--- a/crates/milli/src/search/new/matches/adjust_indices.rs
+++ b/crates/milli/src/search/new/matches/adjust_indices.rs
@@ -33,8 +33,8 @@ fn get_adjusted_indices_for_too_few_words(
     let mut is_index_backwards_at_hard_separator = false;
     let mut is_index_forwards_at_hard_separator = false;
 
-    // false + ends reached because TODO
-    let mut is_crop_size_or_both_ends_reached = is_end_reached && is_beginning_reached;
+    let mut is_crop_size_or_both_ends_reached =
+        words_count == crop_size || (is_end_reached && is_beginning_reached);
 
     let mut dir = Direction::Forwards;
 
@@ -108,18 +108,38 @@ fn get_adjusted_indices_for_too_few_words(
         // 2. if forwards index reached a hard separator and backwards is currently hard, we can go backwards
     }
 
-    // keep advancing forward to check if there's only separator tokens left until the end
-    // if so, then include those too in the index range
-    let mut try_index_forward = valid_index_forward + 1;
-    while let Some(token) = tokens.get(try_index_forward) {
-        if !token.is_separator() {
-            return [valid_index_backward, valid_index_forward];
+    // keep advancing forward and backward to check if there's only separator tokens
+    // left until the end if so, then include those too in the index range
+
+    let saved_index = valid_index_forward;
+    loop {
+        if valid_index_forward == tokens.len() - 1 {
+            break;
         }
 
-        try_index_forward += 1;
+        valid_index_forward += 1;
+
+        if !tokens[valid_index_forward].is_separator() {
+            valid_index_forward = saved_index;
+            break;
+        }
     }
 
-    [valid_index_backward, try_index_forward - 1]
+    let saved_index = valid_index_backward;
+    loop {
+        if valid_index_backward == 0 {
+            break;
+        }
+
+        valid_index_backward -= 1;
+
+        if !tokens[valid_index_backward].is_separator() {
+            valid_index_backward = saved_index;
+            break;
+        }
+    }
+
+    [valid_index_backward, valid_index_forward]
 }
 
 fn get_adjusted_index_forward_for_too_many_words(
@@ -158,14 +178,13 @@ pub fn get_adjusted_indices_for_highlights_and_crop_size(
     crop_size: usize,
 ) -> [usize; 2] {
     match words_count.cmp(&crop_size) {
-        Ordering::Less => get_adjusted_indices_for_too_few_words(
+        Ordering::Equal | Ordering::Less => get_adjusted_indices_for_too_few_words(
             tokens,
             index_backward,
             index_forward,
             words_count,
             crop_size,
         ),
-        Ordering::Equal => [index_backward, index_forward],
         Ordering::Greater => [
             index_backward,
             get_adjusted_index_forward_for_too_many_words(
diff --git a/crates/milli/src/search/new/matches/matching_words.rs b/crates/milli/src/search/new/matches/matching_words.rs
index ab7f90f05..3edc3eb38 100644
--- a/crates/milli/src/search/new/matches/matching_words.rs
+++ b/crates/milli/src/search/new/matches/matching_words.rs
@@ -247,12 +247,22 @@ impl MatchingWords {
         // TODO: There is potentially an optimization to be made here
         // if we matched a term then we can skip checking it for further iterations?
 
+        println!(
+            "{:?}",
+            self.located_matching_words
+                .iter()
+                .flat_map(|lw| lw.value.iter().map(move |w| (
+                    lw.is_prefix,
+                    lw.original_char_count,
+                    self.word_interner.get(*w)
+                )))
+                .collect::<Vec<_>>()
+        );
+
         self.located_matching_words
             .iter()
-            .flat_map(|lw| lw.value.iter().map(move |w| (lw, w)))
+            .flat_map(|lw| lw.value.iter().map(move |w| (lw, self.word_interner.get(*w))))
             .find_map(|(located_words, word)| {
-                let word = self.word_interner.get(*word);
-
                 let [char_count, byte_len] =
                     match PrefixedOrEquality::new(tph.token.lemma(), word, located_words.is_prefix)
                     {
@@ -368,93 +378,105 @@ impl Debug for MatchingWords {
     }
 }
 
-// #[cfg(test)]
-// pub(crate) mod tests {
-//     use super::super::super::located_query_terms_from_tokens;
-//     use super::*;
-//     use crate::search::new::matches::tests::temp_index_with_documents;
-//     use crate::search::new::query_term::ExtractedTokens;
-//     use charabia::{TokenKind, TokenizerBuilder};
-//     use std::borrow::Cow;
+#[cfg(test)]
+mod tests {
+    use super::super::super::located_query_terms_from_tokens;
+    use super::*;
+    use crate::index::tests::TempIndex;
+    use crate::search::new::query_term::ExtractedTokens;
+    use charabia::{TokenKind, TokenizerBuilder};
+    use std::borrow::Cow;
 
-//     #[test]
-//     fn matching_words() {
-//         let temp_index = temp_index_with_documents(None);
-//         let rtxn = temp_index.read_txn().unwrap();
-//         let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
-//         let mut builder = TokenizerBuilder::default();
-//         let tokenizer = builder.build();
-//         let text = "split this world";
-//         let tokens = tokenizer.tokenize(text);
-//         let ExtractedTokens { query_terms, .. } =
-//             located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
-//         let matching_words = MatchingWords::new(ctx, &query_terms);
+    fn temp_index_with_documents() -> TempIndex {
+        let temp_index = TempIndex::new();
+        temp_index
+            .add_documents(documents!([
+                { "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
+                { "id": 2, "name": "Westfália" },
+                { "id": 3, "name": "Ŵôřlḑôle" },
+            ]))
+            .unwrap();
+        temp_index
+    }
 
-//         assert_eq!(
-//             matching_words.get_matches_and_query_positions(
-//                 &[
-//                     Token {
-//                         kind: TokenKind::Word,
-//                         lemma: Cow::Borrowed("split"),
-//                         char_end: "split".chars().count(),
-//                         byte_end: "split".len(),
-//                         ..Default::default()
-//                     },
-//                     Token {
-//                         kind: TokenKind::Word,
-//                         lemma: Cow::Borrowed("nyc"),
-//                         char_end: "nyc".chars().count(),
-//                         byte_end: "nyc".len(),
-//                         ..Default::default()
-//                     },
-//                     Token {
-//                         kind: TokenKind::Word,
-//                         lemma: Cow::Borrowed("world"),
-//                         char_end: "world".chars().count(),
-//                         byte_end: "world".len(),
-//                         ..Default::default()
-//                     },
-//                     Token {
-//                         kind: TokenKind::Word,
-//                         lemma: Cow::Borrowed("worlded"),
-//                         char_end: "worlded".chars().count(),
-//                         byte_end: "worlded".len(),
-//                         ..Default::default()
-//                     },
-//                     Token {
-//                         kind: TokenKind::Word,
-//                         lemma: Cow::Borrowed("thisnew"),
-//                         char_end: "thisnew".chars().count(),
-//                         byte_end: "thisnew".len(),
-//                         ..Default::default()
-//                     }
-//                 ],
-//                 text
-//             ),
-//             (
-//                 vec![
-//                     Match {
-//                         char_count: 5,
-//                         byte_len: 5,
-//                         position: MatchPosition::Word { word_position: 0, token_position: 0 }
-//                     },
-//                     Match {
-//                         char_count: 5,
-//                         byte_len: 5,
-//                         position: MatchPosition::Word { word_position: 2, token_position: 2 }
-//                     },
-//                     Match {
-//                         char_count: 5,
-//                         byte_len: 5,
-//                         position: MatchPosition::Word { word_position: 3, token_position: 3 }
-//                     }
-//                 ],
-//                 vec![
-//                     QueryPosition { range: [0, 0], index: 0 },
-//                     QueryPosition { range: [2, 2], index: 1 },
-//                     QueryPosition { range: [2, 2], index: 2 }
-//                 ]
-//             )
-//         );
-//     }
-// }
+    #[test]
+    fn matching_words() {
+        let temp_index = temp_index_with_documents();
+        let rtxn = temp_index.read_txn().unwrap();
+        let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
+        let mut builder = TokenizerBuilder::default();
+        let tokenizer = builder.build();
+        let text = "split this world";
+        let tokens = tokenizer.tokenize(text);
+        let ExtractedTokens { query_terms, .. } =
+            located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
+        let matching_words = MatchingWords::new(ctx, &query_terms);
+
+        assert_eq!(
+            matching_words.get_matches_and_query_positions(
+                &[
+                    Token {
+                        kind: TokenKind::Word,
+                        lemma: Cow::Borrowed("split"),
+                        char_end: "split".chars().count(),
+                        byte_end: "split".len(),
+                        ..Default::default()
+                    },
+                    Token {
+                        kind: TokenKind::Word,
+                        lemma: Cow::Borrowed("nyc"),
+                        char_end: "nyc".chars().count(),
+                        byte_end: "nyc".len(),
+                        ..Default::default()
+                    },
+                    Token {
+                        kind: TokenKind::Word,
+                        lemma: Cow::Borrowed("world"),
+                        char_end: "world".chars().count(),
+                        byte_end: "world".len(),
+                        ..Default::default()
+                    },
+                    Token {
+                        kind: TokenKind::Word,
+                        lemma: Cow::Borrowed("worlded"),
+                        char_end: "worlded".chars().count(),
+                        byte_end: "worlded".len(),
+                        ..Default::default()
+                    },
+                    Token {
+                        kind: TokenKind::Word,
+                        lemma: Cow::Borrowed("thisnew"),
+                        char_end: "thisnew".chars().count(),
+                        byte_end: "thisnew".len(),
+                        ..Default::default()
+                    }
+                ],
+                text
+            ),
+            (
+                vec![
+                    Match {
+                        char_count: 5,
+                        byte_len: 5,
+                        position: MatchPosition::Word { word_position: 0, token_position: 0 }
+                    },
+                    Match {
+                        char_count: 5,
+                        byte_len: 5,
+                        position: MatchPosition::Word { word_position: 2, token_position: 2 }
+                    },
+                    Match {
+                        char_count: 5,
+                        byte_len: 5,
+                        position: MatchPosition::Word { word_position: 3, token_position: 3 }
+                    }
+                ],
+                vec![
+                    QueryPosition { range: [0, 0], index: 0 },
+                    QueryPosition { range: [2, 2], index: 1 },
+                    QueryPosition { range: [2, 2], index: 2 }
+                ]
+            )
+        );
+    }
+}
diff --git a/crates/milli/src/search/new/matches/mod.rs b/crates/milli/src/search/new/matches/mod.rs
index bab82da8c..f47582af7 100644
--- a/crates/milli/src/search/new/matches/mod.rs
+++ b/crates/milli/src/search/new/matches/mod.rs
@@ -200,7 +200,7 @@ mod tests {
         format_options: Option<FormatOptions>,
         text: &str,
         query: &str,
-        expected_text: &str,
+        expected_maybe_text: Option<&str>,
     ) {
         let temp_index = TempIndex::new();
 
@@ -216,7 +216,28 @@ mod tests {
         let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query);
         let mut matcher = builder.build(text, None);
 
-        assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
+        assert_eq!(
+            matcher.get_formatted_text(format_options),
+            expected_maybe_text.map(|v| v.to_string())
+        );
+    }
+
+    struct FormatVariations<'a> {
+        highlight_with_crop: Option<&'a str>,
+        highlight: Option<&'a str>,
+        crop: Option<&'a str>,
+    }
+
+    impl<'a> FormatVariations<'a> {
+        fn get(&self) -> [(Option<FormatOptions>, Option<&'a str>); 5] {
+            [
+                (None, None),
+                (Some(FormatOptions { highlight: true, crop: Some(2) }), self.highlight_with_crop),
+                (Some(FormatOptions { highlight: true, crop: None }), self.highlight),
+                (Some(FormatOptions { highlight: false, crop: Some(2) }), self.crop),
+                (Some(FormatOptions { highlight: false, crop: None }), None),
+            ]
+        }
     }
 
     /// "Dei store fiskane eta dei små — dei liger under som minst förmå."
@@ -225,77 +246,66 @@ mod tests {
     fn rename_me_with_base_text(
         format_options: Option<FormatOptions>,
         query: &str,
-        expected_text: &str,
+        expected_maybe_text: Option<&str>,
     ) {
         rename_me(
             format_options,
             "Dei store fiskane eta dei små — dei liger under som minst förmå.",
             query,
-            expected_text,
+            expected_maybe_text,
         );
     }
 
     #[test]
-    fn phrase_highlight_bigger_than_crop() {
-        rename_me_with_base_text(
-            Some(FormatOptions { highlight: true, crop: Some(1) }),
-            "\"dei liger\"",
-            "…<em>dei</em>…",
-        );
+    fn empty_query() {
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("Dei store…"),
+            highlight: None,
+            crop: Some("Dei store…"),
+        }
+        .get())
+        {
+            rename_me_with_base_text(format_options, "", expected_maybe_text);
+        }
     }
 
     #[test]
-    fn phrase_highlight_same_size_as_crop() {
-        rename_me_with_base_text(
-            Some(FormatOptions { highlight: true, crop: Some(2) }),
-            "\"dei liger\"",
-            "…<em>dei liger</em>…",
-        );
-    }
-
-    #[test]
-    fn phrase_highlight_crop_middle() {
-        rename_me_with_base_text(
-            Some(FormatOptions { highlight: true, crop: Some(4) }),
-            "\"dei liger\"",
-            "…små — <em>dei liger</em> under…",
-        );
-    }
-
-    #[test]
-    fn phrase_highlight_crop_end() {
-        rename_me_with_base_text(
-            Some(FormatOptions { highlight: true, crop: Some(4) }),
-            "\"minst förmå\"",
-            "…under som <em>minst förmå</em>.",
-        );
-    }
-
-    #[test]
-    fn phrase_highlight_crop_beginning() {
-        rename_me_with_base_text(
-            Some(FormatOptions { highlight: true, crop: Some(4) }),
-            "\"Dei store\"",
-            "<em>Dei store</em> fiskane eta…",
-        );
+    fn only_separators() {
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some(":-…"),
+            highlight: None,
+            crop: Some(":-…"),
+        }
+        .get())
+        {
+            rename_me(format_options, ":-)", ":-)", expected_maybe_text);
+        }
     }
 
     #[test]
     fn highlight_end() {
-        rename_me_with_base_text(
-            Some(FormatOptions { highlight: true, crop: None }),
-            "minst förmå",
-            "Dei store fiskane eta dei små — dei liger under som <em>minst</em> <em>förmå</em>.",
-        );
+        // TODO: Why is "förmå" marked as prefix in located matching words?
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("…<em>minst</em> <em>förmå</em>."),
+            highlight: Some("Dei store fiskane eta dei små — dei liger under som <em>minst</em> <em>förmå</em>."),
+            crop: Some("…minst förmå."),
+        }
+        .get()) {
+            rename_me_with_base_text(format_options, "minst förmå", expected_maybe_text);
+        }
     }
 
     #[test]
     fn highlight_beginning_and_middle() {
-        rename_me_with_base_text(
-            Some(FormatOptions { highlight: true, crop: None }),
-            "Dei store",
-            "<em>Dei</em> <em>store</em> fiskane eta <em>dei</em> små — <em>dei</em> liger under som minst förmå.",
-        );
+        // TODO: Why is "store" marked as prefix in located matching words?
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("<em>Dei</em> <em>store</em>…"),
+            highlight: Some("<em>Dei</em> <em>store</em> fiskane eta <em>dei</em> små — <em>dei</em> liger under som minst förmå."),
+            crop: Some("Dei store…"),
+        }
+        .get()) {
+            rename_me_with_base_text(format_options, "Dei store", expected_maybe_text);
+        }
     }
 
     #[test]
@@ -306,291 +316,185 @@ mod tests {
         // `milli::search::new::query_term::QueryTerm::all_computed_derivations` might be at fault here
 
         // interned words = ["forma"]
-        rename_me(
-            Some(FormatOptions { highlight: true, crop: None }),
-            "altså, förmå, på en måte",
-            "fo",
-            "altså, <em>förmå</em>, på en måte",
-        );
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("…<em>förmå</em>, på…"),
+            highlight: Some("altså, <em>förmå</em>, på en måte"),
+            crop: Some("…förmå, på…"),
+        }
+        .get())
+        {
+            rename_me(format_options, "altså, förmå, på en måte", "fo", expected_maybe_text);
+        }
 
         // interned words = ["fo", "forma"]
-        rename_me(
-            Some(FormatOptions { highlight: true, crop: None }),
-            "altså, fo förmå, på en måte",
-            "fo",
-            "altså, <em>fo</em> <em>fö</em>rmå, på en måte",
-        );
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("…<em>fo</em> <em>fö</em>rmå…"),
+            highlight: Some("altså, <em>fo</em> <em>fö</em>rmå, på en måte"),
+            crop: Some("…fo förmå…"),
+        }
+        .get())
+        {
+            rename_me(format_options, "altså, fo förmå, på en måte", "fo", expected_maybe_text);
+        }
     }
 
     #[test]
     fn partial_match_end() {
-        rename_me(
-            Some(FormatOptions { highlight: true, crop: None }),
-            "förmå, på en måte",
-            "fo",
-            "<em>förmå</em>, på en måte",
-        );
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("<em>förmå</em>, på…"),
+            highlight: Some("<em>förmå</em>, på en måte"),
+            crop: Some("förmå, på…"),
+        }
+        .get())
+        {
+            rename_me(format_options, "förmå, på en måte", "fo", expected_maybe_text);
+        }
 
-        rename_me(
-            Some(FormatOptions { highlight: true, crop: None }),
-            "fo förmå, på en måte",
-            "fo",
-            "<em>fo</em> <em>fö</em>rmå, på en måte",
-        );
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("<em>fo</em> <em>fö</em>rmå…"),
+            highlight: Some("<em>fo</em> <em>fö</em>rmå, på en måte"),
+            crop: Some("fo förmå…"),
+        }
+        .get())
+        {
+            rename_me(format_options, "fo förmå, på en måte", "fo", expected_maybe_text);
+        }
     }
 
     #[test]
     fn partial_match_beginning() {
-        rename_me(
-            Some(FormatOptions { highlight: true, crop: None }),
-            "altså, förmå",
-            "fo",
-            "altså, <em>förmå</em>",
-        );
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("altså, <em>förmå</em>"),
+            highlight: Some("altså, <em>förmå</em>"),
+            crop: Some("altså, förmå"),
+        }
+        .get())
+        {
+            rename_me(format_options, "altså, förmå", "fo", expected_maybe_text);
+        }
 
-        rename_me(
-            Some(FormatOptions { highlight: true, crop: None }),
-            "altså, fo förmå",
-            "fo",
-            "altså, <em>fo</em> <em>fö</em>rmå",
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("…<em>fo</em> <em>fö</em>rmå"),
+            highlight: Some("altså, <em>fo</em> <em>fö</em>rmå"),
+            crop: Some("…fo förmå"),
+        }
+        .get())
+        {
+            rename_me(format_options, "altså, fo förmå", "fo", expected_maybe_text);
+        }
+    }
+
+    #[test]
+    fn separator_at_end() {
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("…<em>minst</em> förmå. , ;"),
+            highlight: Some("; , — dei liger under som <em>minst</em> förmå. , ;"),
+            crop: Some("…minst förmå. , ;"),
+        }
+        .get())
+        {
+            rename_me(
+                format_options,
+                "; , — dei liger under som minst förmå. , ;",
+                "minst",
+                expected_maybe_text,
+            );
+        }
+    }
+
+    #[test]
+    fn separator_at_beginning() {
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("; , — <em>dei</em> liger…"),
+            highlight: Some("; , — <em>dei</em> liger under som minst förmå. , ;"),
+            crop: Some("; , — dei liger…"),
+        }
+        .get())
+        {
+            rename_me(
+                format_options,
+                "; , — dei liger under som minst förmå. , ;",
+                "dei",
+                expected_maybe_text,
+            );
+        }
+    }
+
+    #[test]
+    fn phrase() {
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("…<em>dei liger</em>…"),
+            highlight: Some(
+                "Dei store fiskane eta dei små — <em>dei liger</em> under som minst förmå.",
+            ),
+            crop: Some("…dei liger…"),
+        }
+        .get())
+        {
+            rename_me_with_base_text(format_options, "\"dei liger\"", expected_maybe_text);
+        }
+    }
+
+    #[test]
+    fn phrase_highlight_bigger_than_crop() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: true, crop: Some(1) }),
+            "\"dei liger\"",
+            Some("…<em>dei</em>…"),
         );
     }
 
-    // #[test]
-    // fn format_identity() {
-    //     let temp_index = temp_index_with_documents(None);
-    //     let rtxn = temp_index.read_txn().unwrap();
-    //     let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
-    //     let format_options = Some(FormatOptions { highlight: false, crop: None });
+    #[test]
+    fn phrase_bigger_than_crop() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: false, crop: Some(1) }),
+            "\"dei liger\"",
+            Some("…dei…"),
+        );
+    }
 
-    //     let test_values = [
-    //         // Text without any match.
-    //         "A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
-    //         // Text containing all matches.
-    //         "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
-    //         // Text containing some matches.
-    //         "Natalie risk her future to build a world with the boy she loves."
-    //     ];
+    #[test]
+    fn phrase_highlight_crop_middle() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: true, crop: Some(4) }),
+            "\"dei liger\"",
+            Some("…små — <em>dei liger</em> under…"),
+        );
+    }
 
-    //     for text in test_values {
-    //         let mut matcher = builder.build(text, None);
-    //         // no crop and no highlight should return complete text.
-    //         assert_eq!(matcher.get_formatted_text(format_options), None);
-    //     }
-    // }
+    #[test]
+    fn phrase_crop_middle() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: false, crop: Some(4) }),
+            "\"dei liger\"",
+            Some("…små — dei liger under…"),
+        );
+    }
 
-    // #[test]
-    // fn format_highlight() {
-    //     let temp_index = temp_index_with_documents(None);
-    //     let rtxn = temp_index.read_txn().unwrap();
-    //     let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
-    //     let format_options = Some(FormatOptions { highlight: true, crop: None });
+    #[test]
+    fn phrase_highlight_crop_end() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: true, crop: Some(4) }),
+            "\"minst förmå\"",
+            Some("…under som <em>minst förmå</em>."),
+        );
+    }
 
-    //     let test_values = [
-    //         // empty text.
-    //         ["", ""],
-    //         // text containing only separators.
-    //         [":-)", ":-)"],
-    //         // Text without any match.
-    //         ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
-    //          "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"],
-    //         // Text containing all matches.
-    //         ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
-    //          "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."],
-    //         // Text containing some matches.
-    //         ["Natalie risk her future to build a world with the boy she loves.",
-    //          "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves."],
-    //     ];
+    #[test]
+    fn phrase_crop_end() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: false, crop: Some(4) }),
+            "\"minst förmå\"",
+            Some("…under som minst förmå."),
+        );
+    }
 
-    //     for [text, expected_text] in test_values {
-    //         let mut matcher = builder.build(text, None);
-    //         // no crop should return complete text with highlighted matches.
-    //         assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
-    //     }
-    // }
-
-    // #[test]
-    // fn highlight_unicode() {
-    //     let temp_index = temp_index_with_documents(None);
-    //     let rtxn = temp_index.read_txn().unwrap();
-    //     let format_options = Some(FormatOptions { highlight: true, crop: None });
-
-    //     let test_values = [
-    //         // Text containing prefix match.
-    //         ["world", "Ŵôřlḑôle", "<em>Ŵôřlḑ</em>ôle"],
-    //         // Text containing unicode match.
-    //         ["world", "Ŵôřlḑ", "<em>Ŵôřlḑ</em>"],
-    //         // Text containing unicode match.
-    //         ["westfali", "Westfália", "<em>Westfáli</em>a"],
-    //     ];
-
-    //     for [query, text, expected_text] in test_values {
-    //         let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query);
-    //         let mut matcher = builder.build(text, None);
-    //         // no crop should return complete text with highlighted matches.
-    //         assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
-    //     }
-    // }
-
-    // #[test]
-    // fn format_crop() {
-    //     let temp_index = temp_index_with_documents(None);
-    //     let rtxn = temp_index.read_txn().unwrap();
-    //     let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
-    //     let format_options = Some(FormatOptions { highlight: false, crop: Some(10) });
-
-    //     let test_values = [
-    //         // empty text.
-    //         // ["", ""],
-    //         // text containing only separators.
-    //         // [":-)", ":-)"],
-    //         // Text without any match.
-    //         ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
-    //          "A quick brown fox can not jump 32 feet, right…"],
-    //         // Text without any match starting by a separator.
-    //         ["(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)",
-    //          "(A quick brown fox can not jump 32 feet, right…" ],
-    //         // Test phrase propagation
-    //         ["Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.",
-    //          "…Split The World is a book written by Emily Henry…"],
-    //         // Text containing some matches.
-    //         ["Natalie risk her future to build a world with the boy she loves.",
-    //          "…future to build a world with the boy she loves."],
-    //         // Text containing all matches.
-    //         ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
-    //          "…she loves. Emily Henry: The Love That Split The World."],
-    //         // Text containing a match unordered and a match ordered.
-    //         ["The world split void void void void void void void void void split the world void void",
-    //          "…void void void void void split the world void void"],
-    //         // Text containing matches with different density.
-    //         ["split void the void void world void void void void void void void void void void split the world void void",
-    //          "…void void void void void split the world void void"],
-    //         ["split split split split split split void void void void void void void void void void split the world void void",
-    //          "…void void void void void split the world void void"]
-    //     ];
-
-    //     for [text, expected_text] in test_values {
-    //         let mut matcher = builder.build(text, None);
-    //         // no crop should return complete text with highlighted matches.
-    //         assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
-    //     }
-    // }
-
-    // #[test]
-    // fn format_highlight_crop() {
-    //     let temp_index = temp_index_with_documents(None);
-    //     let rtxn = temp_index.read_txn().unwrap();
-    //     let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
-    //     let format_options = Some(FormatOptions { highlight: true, crop: Some(10) });
-
-    //     let test_values = [
-    //         // empty text.
-    //         ["", ""],
-    //         // text containing only separators.
-    //         [":-)", ":-)"],
-    //         // Text without any match.
-    //         ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
-    //          "A quick brown fox can not jump 32 feet, right…"],
-    //         // Text containing some matches.
-    //         ["Natalie risk her future to build a world with the boy she loves.",
-    //          "…future to build a <em>world</em> with <em>the</em> boy she loves."],
-    //         // Text containing all matches.
-    //         ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
-    //          "…she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."],
-    //         // Text containing a match unordered and a match ordered.
-    //         ["The world split void void void void void void void void void split the world void void",
-    //          "…void void void void void <em>split</em> <em>the</em> <em>world</em> void void"]
-    //     ];
-
-    //     for [text, expected_text] in test_values {
-    //         let mut matcher = builder.build(text, None);
-    //         // no crop should return complete text with highlighted matches.
-    //         assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
-    //     }
-    // }
-
-    // #[test]
-    // fn format_highlight_crop_phrase_query() {
-    //     //! testing: https://github.com/meilisearch/meilisearch/issues/3975
-    //     let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
-    //     let temp_index = temp_index_with_documents(Some(documents!([
-    //         { "id": 1, "text": text }
-    //     ])));
-    //     let rtxn = temp_index.read_txn().unwrap();
-
-    //     let format_options = Some(FormatOptions { highlight: true, crop: Some(10) });
-
-    //     let test_values = [
-    //         // should return 10 words with a marker at the start as well the end, and the highlighted matches.
-    //         ["\"the world\"",
-    //          "…the power to split <em>the world</em> between those who embraced…"],
-    //         // should highlight "those" and the phrase "and those".
-    //         ["those \"and those\"",
-    //          "…world between <em>those</em> who embraced progress <em>and those</em> who resisted…"],
-    //         ["\"The groundbreaking invention had the power to split the world\"",
-    //          "<em>The groundbreaking invention had the power to split the world</em>…"],
-    //         ["\"The groundbreaking invention had the power to split the world between those\"",
-    //          "<em>The groundbreaking invention had the power to split the world</em>…"],
-    //         ["\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"",
-    //          "…between those who <em>embraced progress and those who resisted change</em>!"],
-    //         ["\"groundbreaking invention\" \"split the world between\"",
-    //          "…<em>groundbreaking invention</em> had the power to <em>split the world between</em>…"],
-    //         ["\"groundbreaking invention\" \"had the power to split the world between those\"",
-    //          "…<em>invention</em> <em>had the power to split the world between those</em>…"],
-    //     ];
-
-    //     for [query, expected_text] in test_values {
-    //         let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query);
-    //         let mut matcher = builder.build(text, None);
-
-    //         assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
-    //     }
-    // }
-
-    // #[test]
-    // fn smaller_crop_size() {
-    //     //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
-    //     let temp_index = temp_index_with_documents(None);
-    //     let rtxn = temp_index.read_txn().unwrap();
-    //     let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
-    //     let text = "void void split the world void void.";
-    //     let mut matcher = builder.build(text, None);
-
-    //     let test_values = [
-    //         // set a smaller crop size
-    //         // because crop size < query size, partially format matches.
-    //         (2, "…split the…"),
-    //         // set a smaller crop size
-    //         // because crop size < query size, partially format matches.
-    //         (1, "…split…"),
-    //         // set  crop size to 0
-    //         // because crop size is 0, crop is ignored.
-    //         (0, "void void split the world void void."),
-    //     ];
-
-    //     for (crop_size, expected_text) in test_values {
-    //         // set a smaller crop size
-    //         let format_options = Some(FormatOptions { highlight: false, crop: Some(crop_size) });
-    //         assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
-    //     }
-    // }
-
-    // #[test]
-    // fn partial_matches() {
-    //     let temp_index = temp_index_with_documents(None);
-    //     let rtxn = temp_index.read_txn().unwrap();
-    //     let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "the \"t he\" door \"do or\"");
-
-    //     let format_options = Some(FormatOptions { highlight: true, crop: None });
-
-    //     let text = "the do or die can't be he do and or isn't he";
-    //     let mut matcher = builder.build(text, None);
-    //     assert_eq!(
-    //         matcher.get_formatted_text(format_options),
-    //         Some(
-    //             "<em>the</em> <em>do or</em> die can't be he do and or isn'<em>t he</em>"
-    //                 .to_string()
-    //         )
-    //     );
-    // }
+    #[test]
+    fn phrase_highlight_crop_beginning() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: true, crop: Some(4) }),
+            "\"Dei store\"",
+            Some("<em>Dei store</em> fiskane eta…"),
+        );
+    }
 }