From 561b4836d8415fd3b1e0a93b90b4e9f74b58d41d Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Sun, 15 Jun 2025 14:18:18 +0300
Subject: [PATCH] Refactor, fix, misc

---
 crates/milli/.tmp4e121b/data.mdb              | Bin 0 -> 69632 bytes
 crates/milli/.tmp4e121b/lock.mdb              | Bin 0 -> 8128 bytes
 crates/milli/.tmpNxMsye/data.mdb              | Bin 0 -> 69632 bytes
 crates/milli/.tmpNxMsye/lock.mdb              | Bin 0 -> 8128 bytes
 .../src/search/new/matches/adjust_indices.rs  | 245 ++++---
 .../src/search/new/matches/match_bounds.rs    |  60 +-
 .../src/search/new/matches/matching_words.rs  | 283 ++++----
 crates/milli/src/search/new/matches/mod.rs    | 667 ++++++++++--------
 crates/milli/src/search/new/query_term/mod.rs |   3 +-
 9 files changed, 689 insertions(+), 569 deletions(-)
 create mode 100644 crates/milli/.tmp4e121b/data.mdb
 create mode 100644 crates/milli/.tmp4e121b/lock.mdb
 create mode 100644 crates/milli/.tmpNxMsye/data.mdb
 create mode 100644 crates/milli/.tmpNxMsye/lock.mdb
diff --git a/crates/milli/.tmp4e121b/data.mdb b/crates/milli/.tmp4e121b/data.mdb
new file mode 100644
index 0000000000000000000000000000000000000000..f6705d4f179f23fcb1b8d7064e7e1f3d8fedf335
GIT binary patch
literal 69632
zcmeI5UuYc19mnTRl6|^AI~P}^sx8cMloX8CoK)GBgda?BX`s|C)Pyz<G3(nsv9)$r
z?CvR2f*=hp3B?blgg~JL6R62UA4&<K1Y=q!O$~;kx0qr=2&Ofqv?cVR525MI{C4(s
zC0n|a9M!?!d$`^C?aXg}`<c0we)F4|WlVoc>g2=U{qtHui_~$AQgy~ktaqh%MwfMZ
zU#_f{vP?<JFrMTaPL8@<F;^Z55C8!X009sH0T2KI5C8!X009sHfd@gL=b`V;|JQ!I
z7$(SkdfIzubXoiVMv_~X8_JZ%DF}c72!H?xfB*=900@8p2!H?xfWU(xp#65nitHr2
zTDn@gYn(Qw`R(%I>WT8j@^WRpa-{ON(bq-~)ZXL&Ej>B<BDZUw)_z@k-S|=UTJ^zL
z0WBZ^0w4eaAOHd&&>w-8$W~;d%nsUJ&++4^zhaDimV6j#sIq0J9S5Olg<-Hds1nj~
zNb7LCbB=90q1m=#XCVkzP1hb&Wn$L^>$uLMEfkpI7R7hRYCEwR#i8pj41BX>=U48n
zvvYiAV1<d5<BZG__PLSicuVmrnLDsTvhd3}eSUXwafj*>`yTJ<b5`6aj&`Vytou?=
z8?}SkiQ*l$Z?mV%4?@pcbf0(Zq<MGO%9(pktW+Inhb@x1XVFh~h%af4L+rUAw9Tc^
z>9{LqzJCvF-aU0L1(6%OfxkOD5B7d1%HVd;_2V6?q@FWIcfQM8&gt6at&*;TNsCx+
zD4Hc$aXojJzu<A^tXS>X%(nDy`rm<EpV^J3$Fi2~Oi}t&aaLj{^ey#tqCY?4K|OF6
zrZ%Y19q!7aVdhz`PmiKrw7qwhzoO&p`Y8XB9$<I)9p3N54EaC+1V8`;KmY_l00ck)
z1V8`;KmY_DCIS?s-!Dd}!$=H_3De}L06F@4{XOFU4^xZ577zdd5C8!X0D=1vpx6oG
z|M%lVay{9BXd(WO`2W`ZMPAMK8N~l>6Y>A8dQHB+ApXA<E4r2^ws-M=9q%7WOw>o|
z|3A(y@C*DcH3tCm|F>^L<NyH>009sH0T9@G0`UL)VV?g_6aGK^|LuECzR$q_hyTBI
zpUL+Z`2Snel2^0D4y){C)(#!9X3RFNxH;XLK4!L#n^QAqT66QQnfa-yqu-dFo12?C
z_Doaf?=IPW<=-m)ulL=j`Tw{0EzJMlzVVO)1V8`;KmY_l00ck)1V8`;KmY_D5(2cp
zen~|2>7$Q2{;$*fQ+wt5|110o{Qr~)PCx(zKmY_l00ck)1V8`;KmY_lU~dVWR@LdX
z`h_W0_*V0j<2E9(Ypv7hxY2pbcN$J?HEb#M-u-3c;}>65iA8t82^(EsY)RXQ0<Yn@
zeiS!4@4g+1Tt}Pq)<3CU`v0HsPcZ*~Z<`jifdB}A00@8p2n<93^Z)z7m7Qr?WSIYt
z`TyJZntY$Z{C~{<-@4D_`wQm(Z%s>H%@Wh|`S)i2KaC<3*pxDIG&Qxzo?xf=I{yVf
z!<YFr{vrP<KgIvWf5mN%_y47sZ~_7#00JNY0wC~zB>?}wAN<+>G~xf_{eOe^8TkM3
z|F`Zn`De_d=KqsNJ;Wl<QkZmF#N#vR5vGFh8OeU}j!6}{m$HCLjW89U&*U0nBBq~7
zk1!SWmy*wPnT~vYsFe65(cZnjt|kG<>x?AA*1%Kgvig=^(*HloPx9ORDE|$AU3mT9
z<Nx3n`3=5;H32CloPYobfB*=900@8p2!H?xfB*<Q3Ixb|-p7<@FJGdU_|wBlG+n1l
z+KbQRDY3*0&&iT8fsAY)`Q{l(i6#DhMpo{P9b}{>JvNY$?W6I7oJ5UOC`l!4wjw*i
z%KW$dC9wy<4gM<s0r&X?|2r@9KZ|()EsO!Aq;LWPAOHd&00JNY0w4eaAOHd&@I@w|
zeS75_tJmU@B=y_XR(V>M`0Z->yp|<#`HZKX@zT|Xdpc+0&nxGfWQmuq0BVUEWl$UT
zX^EN%pb{k6IPQ>WQJebfGAh!qruma39h*?0e|k57thUI;*hl;$G2{P>+={Rr1V8`;
zKmY_l00ck)1V8`;KmY_DMFQmW%RvM+ERoLtX9e>7Yblf>dx~AERr!C6N#jXloge3S
z#mxW9hHqTtS88t?f2lS38~kJAQtcgnjz4Xj=6^Cit&JJ$wRdW-7^xbZfB*=900@8p
z2!H?xfB*=900{I&fc&*GE6Ol*x@=oVZi_MioGb^|Ifp3hyh$p`rwh(?UR8dBw91C`
zGW28tJLMK~sy|(juC)|oUFjlpl9pIOhO2W{R9TVg+MvN;1uCuU6=c#nsYJS<J*`uQ
zu{Y=u`el8aw3OJ8vXGXzYYj!Zn`Gg8YPHHqi|2Jtb$^@L%lq3vs6e?k5KZb=;d^~&
zvPI=){j_vIO9eHlF-NV;3ZyS*skm2tpBr))Wxfr(s;r=M=d2KNVwmRdIqpSvl3gub
zE!{Ov8`J!D`Ed0_`C@swvR*k-!Tx`J{dkcH1V8`;KmY_l;BzN%pZ)(R&>?H@oslg%
zoxb##G*sEL(-!}nnN}DEtNogt-}CB_*5P>P99z6>$!uG(vk-)<rfUzXl962#;$K^5
z(H06!J7|mVj@5QzGm1mkUx)@(N;<LsKlcCMzUSooHunF!*ZzO{VY2x#qu;Hr=T_4%
zZ;U+`gyJ`x`$;Fi;`{5;>$CMfZ8=>_LFC46;M3z_TU|QVt96pDgJjo|<%Z@`7_7LS
z8?W}MaU$tTS~F>U)b{@i+Fj4_<48QHw`~L6()zu7M65z>pq}#Es?g1sZrx8hhM8x%
zJ~fE0Wpm%u{YX#p*GD=3|EuhB?E?RI?Q(6xc-DBzc#GfR!~A-0h+z8%2XcS_2!H?x
zfB*=900@8p2!H?x?2y0-RgN>^8j_QFHj1q{dZFoe#Xw{D-Oe|CF&yYb&H1UxW*o%U
z;<ugE@4L@C&H33x@U*qqbx1VRnw?z}?|GM>T2H5RT<3)ssolo9%D^5|2Qm61hb+Xf
znJY&g&voU9!wXH<rYfD4Se&fUm`8;zg|26bLC^Efsu=hDj?!Rg4Wb&;vzCPMmghug
zMY1_hnp(ovMAM{D9i3G<<kePlUe-0)oDwH0&)Q_u7kNeT=KS;;`<klbfbyUtF@oWA
z+!ZG>J5j8b&eN@I{3Zm9ojGrEMo#l+qTyZz>#tmA^`HEn)nB^#hrhl*TEFzi!bsrO
zi>&_g^$%{a`j6kgDZVcquD|?VVdRN8@ajY3^`G4=jEo&@OdVt0$Xo0<_Ef#3efJ+G
z2L1f>Yj3d6>HUARGhc5Vo11y2N#jua#Rzp6iMA6}%Z4V;KWS=_mh1oD<hS^ny&-|b
zNE|=_1V8`;KmY_l00ck)1V8`;KmY{xhCoAABUTn9)#;<!>QBhjhK1rtYZw?CPx9-z
LDEg?&>Gb~q$g8fD

literal 0
HcmV?d00001

diff --git a/crates/milli/.tmp4e121b/lock.mdb b/crates/milli/.tmp4e121b/lock.mdb
new file mode 100644
index 0000000000000000000000000000000000000000..b4ab0527083cbd1fe238516d912af88bb8b62101
GIT binary patch
literal 8128
zcmeIvu?Ye(7>41Xjflm8jfI7UIDsQ*BZ>&Rha=n$f;P?~S2%|&xPYLD5&q^aH0_%v
z9N+ho<hfgOzqIOpr=32lm-k~;udb*0A%Eibotq6(I#02N!85OSVZ6l}262A5#u^53
zzS&_7gE&7deE(>I$6yf8uNGLtAkObMSi>OBKe|}MAkLTGzx4nAb3Q)IKmi35P(T3%
h6i`3`1r$&~0R<FLKmi35P(T3%6i`3`1r+#E-~}ggW}^TA

literal 0
HcmV?d00001

diff --git a/crates/milli/.tmpNxMsye/data.mdb b/crates/milli/.tmpNxMsye/data.mdb
new file mode 100644
index 0000000000000000000000000000000000000000..ea920733de21449c828d398e7d75f3a895a2cebb
GIT binary patch
literal 69632
zcmeI5UuYc19mnTRmVLU@?Oarms<tu5VNx(!ai>4FdiWt2mloQp4>h6ni&@|9satDz
zkKH}z<W!Ibmy{BMC?QZN!IakIp$~lsc__v-f9e*ThfoUjg9)J+YVyz)(}zBkw6pWu
z+1owa(w*e04*u%lcIUUVzxmB)=2rKc-^?sy>aQSAKKkw7ujiCV9ycgeVXVOVSNdmk
zS*7=9?p0Ejx|1@@#QFBcM|EF5b3YOw00JNY0w4eaAOHd&00JNY0w4eakAgtoL*JeM
zul#l~OpyBPYww@YW##{Cac*_rSn6(^f&d7B00@8p2!H?xfB*=900@8p2s|1B%5P^Z
z&z@!13fBsEwH0li-!2}RJXw6Dc(L?h=~(HLiPtBl%J1_}3#TSt<YxKb<zJWI(7rc$
zee%&*0WBZ^0w4eaAOHd&FdTup$d+dl%=B7a$8y79xZ4=}PyEM7!z8<CwL;I=4d3_H
zM^!>P4k;a$bKWvd%hy{*Xsvqwx^A1Js*LSA%-XiKW(ozmsG|668!ao;gV49#)sZVp
zc7EZ(I@`w=M^+eHIm5^-VV@o7meUE>$=s0@l7*ko=ySVkYdcgI+xK)|pVhO2Fxa6w
zvhH(zZPX5WI|z5!zO}wC*Yh1?&Aw!rar5r3l~Z+&t&|;Thb@w-v*;%~#22^5A@;oI
zn|jB$+ICOR_V1C+d!Wva7uccgxx2ITVE=m}4Q_c|H{790>N(R?^IcXsqidH{C0z&O
z7BOsJG>g}>9ebDG;2CE1j8>?pTY5MBZz|JgcBAR3v}HR}lsr|eUTFEQA)ik4??*hS
zNAAMZ1|_QDt}NQe9K&|$QPhvN_s_CdRGeKMqyMB+><+)fhkckK9|(W|2!H?xfB*=9
z00@8p2!H?xfWYHKfP(Y~#0a$?h=DO-S~My^j=oxdkNE%N)MBs&1V8`;KmY_l;9&$P
zc7pi-!}yR~O?Dt!i2o!0zjc3!u4elT;{T?O`2SYDCfi>S|KExgUCR>NyZFC~_m9UW
zs$=B;Kg}-l%ls`l2LSW`w{Jt_009sH0T2KI5ZHSH@c)Nlp8cf>{~!MU_Pr+CXW;+C
z|KGaLWcv&J|E+07SJT9f$m~Vd@-4Au%+!srHea7#((A|d#^Ra!a&zfKb75ibL}O|G
zON)zVYbt-YV-A&ntN6d#cc13}-{QA0|9|_&Lk<uC0T2KI5C8!X009sH0T2KI5O_=o
z(Ej=b5!I(ZbyV?xl|GQz8?FDp%CExzPl(_I1V8`;KmY_l00ck)1V8`;KmY{xmcWWE
zPOa6?HCXN|wbPbe4aBasR<&&h7Yx^`TA@)jBdPP=FROof@im!PvsW#@+I7X2wAH|K
zs*ddjVYU6<+rG#(SEIN7Md~B}{~o`G`Tu*{v?vV(KmY_l00cl_Bm$WKKMbz)Ow&BW
z{C~{<-@eym`wZs)WB&iveJ0yqF#msRTG7=sF*TomZ|48gC_;`kq>&Sesd@GcJI#N^
zf5y*nlYhiN;5~kt|AYUA|Cr<be+ed>fB*=900@8p2>f3O!2cfxfBKgu{C~XvZ}dI`
z{~!MU*1abCjCs=hfAXluSl}2ElS+$td@6lFyirod=~J?CCd2WO#G|{$nGDsZ@{BPN
z(@&+9=dSY0IRL5a<4lJ9Gm>`ndeM|b*y=elU6h~EN96y{34i}p{(XLfU*X^7r^H-<
z=lE?thcy8SC7gf&2!H?xfB*=900@8p2!H?xJP8EI=PoE8J$j8EQ?l5TPs#R^mz|On
zly9Dr?PqeFAS00;e@aqdu?L@$3GI<jpOO^&BMd2NfyE;PDcQa_`p;M?X|_B&#)=~9
z|2Glwzs=v|-{vkq%wOY0{*ssnP{$ZRLJB7!00JNY0w4eaAOHd&00JNY0-t39<j2Qe
zwR|m}^6cYyy4)&H$>J!zTt2U4_xsr?k6doJr*g*LymY=v7LOIk>G&#FJfa{&^eUGe
zE0CM`DTy2_kO`9XefvxfxzT?{A{&d|4G^8?*);oD#Q#5rr~g^*#;_d(KmY_l00ck)
z1V8`;KmY_l;E5za9={kc$dRX?q-o*+D<@R)?5k|Ee3Uo2tDV*E@)iCsZIi#PUDkfV
zKPi8peOj)FnE|ufX8Di&5<jK2`5kRqyIQ_p{!{r4Em4CL5C8!X009sH0T2KI5C8!X
z0D++hkbhQW`Dk)?vXGmklWdG6$)a(Udo0SGEFAAkqwV3<@^O-iH^M~$<9JEA$|+?7
z_lwfhg7tV|dy<>1h?kI)bdu%i{sG$xEEk2W2g=Ju>15$LY0y}XB&t-wOX(gX-N`a`
zWl1iwVW31|Nu>QgEd@3vOQnVHN^_3x9?&c(MOF{gTUtHPCZc{)D=n{2v{J4CqOk=s
zgs<urwX|%dj8tfi%8oQA6pAcI`UbhHnhxa7NBIWoHOX=+cU)fIanG}7*|oy8!d-1e
zo9DNSM<!1eUnyQJeONkH!v23leR+`y1V8`;KmY_l;D1iwA^ZQyb&j&|{u$Y#(#cDY
zNy8+&Xtl(TFx~KdZ+%#^vwL10QaUW>yk&})E$J;Iv{pTTUAN6qRWh>cu=o*ct(iiB
zZh9^8**02Ms0X2MyQ{&dN=Ya7|HuCS+xMJo-^Tub58D4vJxt;y*eR&$-Rf#?HGT7@
z+4G(+zQdWXbo?!TxGuFmTkX@9(be$+JG4EQ9uM2<Qn6m8lXM*<yE=yL>mA?g*^V8q
z52^8RTx+G7G(Ktj|9P#hW4U1<9@N{mfof^>-aR5#qBf9E`E6CG=1aEjCms8kW7sY=
zh$`j&zNz|=n&hvJ(ft3T>`M79epI_s{<`+6*3<r3zQY&!z5WnEs*mFo1V8`;KmY_l
z00ck)1V8`;K;Y35I4RX}CR{^uGS3B}5e6^R+^!gC^uO8uvMYuIt)SLy%+^9LG}gXm
zt$)kDWYwC>vEVslt!t5JvA(>#!PNWax*b!DQJT6D*5>Q;OM3md-dH?SUv4g)Xf7<w
zomgI8TwY!{Tbq(Ll>0VBj&4(UoyyIuP@SKW2QgX{4RMHJHai-rJl~B*C|;=9CYfmU
zLUFP|V<aWk@omQtgQOR%bumu*4XHs>8bmR=V{}Ai4aW-3iDa!wn(Cr%v8Gv}IyfhD
zL|5yzW>nT}tszcio{ia>EAoolYt8u$_C;C7l=Qd*G2&sh?Vc6r?I4uP>ZvM&pM-$1
zGZ%Eu$jP3FH9Y9X%8%Y<l^?#tDlgsq-Cu7^R5pK~8~5x=o>g9c_s@S|l^@)=DLyxk
zR9=2RH~viMIhC=Q%1>_Q#-|Tf8%wMkIBRXoY*Y%$*Z)pz&`(!h|9R4W!@e(&#-R>~
z5o$jWZ6Q=e4M(1T+;k!xt^a?M-x53j#bx0D0w4eaAOHd&00JNY0w4eaAOHd&u*U?d
kvOKYZATC!O<+gr8CO0e;M_Pfv*i4*X%|%g1b)QQA7g+hN<^TWy

literal 0
HcmV?d00001

diff --git a/crates/milli/.tmpNxMsye/lock.mdb b/crates/milli/.tmpNxMsye/lock.mdb
new file mode 100644
index 0000000000000000000000000000000000000000..abe89541a2a8b5633c06e4203f2fe5fac9ab0262
GIT binary patch
literal 8128
zcmeH@AqoOf7zP(DCPBTzA|_EViNR<Rn-?&MCop;tZy}gO_5d5iym$qR!KR2S{Dvtu
zVP1JWX1*8Zl3kCLO{-Q--ltyPSw1-JcdK*x#Ot%Wb(7@KM-9QFJTGA}L=8dQPui#<
zi2HPk8iKgr&QU`U_q7x?1aY5_P($#w@1TYt?u!O$2;zR-L=8dQ7c<ll#Qm~|8iKgL
zk5NMq_xTU~@neAnEMNf(Sik}nuz&?DU;ztQzycPqfCVgI0Sj2b0v51<1uXD)1)hh7
BeAxg1

literal 0
HcmV?d00001

diff --git a/crates/milli/src/search/new/matches/adjust_indices.rs b/crates/milli/src/search/new/matches/adjust_indices.rs
index 737c40ee3..b7d9ad793 100644
--- a/crates/milli/src/search/new/matches/adjust_indices.rs
+++ b/crates/milli/src/search/new/matches/adjust_indices.rs
@@ -1,152 +1,153 @@
 use std::cmp::Ordering;
 
-use charabia::{SeparatorKind, Token, TokenKind};
+use charabia::{SeparatorKind, Token};
 
-enum SimpleTokenKind {
-    Separator(SeparatorKind),
-    NonSeparator,
-    Done,
+#[derive(Clone)]
+enum Direction {
+    Forwards,
+    Backwards,
 }
 
-impl SimpleTokenKind {
-    fn new(token: &Token) -> Self {
-        match token.kind {
-            TokenKind::Separator(separator_kind) => Self::Separator(separator_kind),
-            _ => Self::NonSeparator,
-        }
-    }
-}
-
-struct CropBoundsHelper<'a> {
-    tokens: &'a [Token<'a>],
-    index_backward: usize,
-    backward_token_kind: SimpleTokenKind,
-    index_forward: usize,
-    forward_token_kind: SimpleTokenKind,
-}
-
-impl CropBoundsHelper<'_> {
-    fn advance_backward(&mut self) {
-        if matches!(self.backward_token_kind, SimpleTokenKind::Done) {
-            return;
-        }
-
-        if self.index_backward != 0 {
-            self.index_backward -= 1;
-            self.backward_token_kind = SimpleTokenKind::new(&self.tokens[self.index_backward]);
-        } else {
-            self.backward_token_kind = SimpleTokenKind::Done;
-        }
-    }
-
-    fn advance_forward(&mut self) {
-        if matches!(self.forward_token_kind, SimpleTokenKind::Done) {
-            return;
-        }
-
-        if self.index_forward != self.tokens.len() - 1 {
-            self.index_forward += 1;
-            self.forward_token_kind = SimpleTokenKind::new(&self.tokens[self.index_forward]);
-        } else {
-            self.forward_token_kind = SimpleTokenKind::Done;
+impl Direction {
+    fn switch(&mut self) {
+        *self = match self {
+            Direction::Backwards => Direction::Forwards,
+            Direction::Forwards => Direction::Backwards,
         }
     }
 }
 
 fn get_adjusted_indices_for_too_few_words(
     tokens: &[Token],
-    index_backward: usize,
-    index_forward: usize,
+    mut index_backward: usize,
+    mut index_forward: usize,
     mut words_count: usize,
     crop_size: usize,
 ) -> [usize; 2] {
-    let crop_size = crop_size + 2;
-    let mut cbh = CropBoundsHelper {
-        tokens,
-        index_backward,
-        backward_token_kind: SimpleTokenKind::new(&tokens[index_backward]),
-        index_forward,
-        forward_token_kind: SimpleTokenKind::new(&tokens[index_forward]),
-    };
+    let mut valid_index_backward = index_backward;
+    let mut valid_index_forward = index_forward;
+
+    let mut is_end_reached = index_forward == tokens.len() - 1;
+    let mut is_beginning_reached = index_backward == 0;
+
+    let mut is_index_backwards_at_hard_separator = false;
+    let mut is_index_forwards_at_hard_separator = false;
+
+    // false + ends reached because TODO
+    let mut is_crop_size_or_both_ends_reached = is_end_reached && is_beginning_reached;
+
+    let mut dir = Direction::Forwards;
 
     loop {
-        match [&cbh.backward_token_kind, &cbh.forward_token_kind] {
-            // if they are both separators and are the same kind then advance both,
-            // or expand in the soft separator side
-            [SimpleTokenKind::Separator(backward_sk), SimpleTokenKind::Separator(forward_sk)] => {
-                if backward_sk == forward_sk {
-                    cbh.advance_backward();
+        if is_crop_size_or_both_ends_reached {
+            break;
+        }
 
-                    // this avoids having an ending separator before crop marker
-                    if words_count < crop_size - 1 {
-                        cbh.advance_forward();
+        let (index, valid_index) = match dir {
+            Direction::Backwards => (&mut index_backward, &mut valid_index_backward),
+            Direction::Forwards => (&mut index_forward, &mut valid_index_forward),
+        };
+
+        loop {
+            match dir {
+                Direction::Forwards => {
+                    if is_end_reached {
+                        break;
                     }
-                } else if matches!(backward_sk, SeparatorKind::Hard) {
-                    cbh.advance_forward();
-                } else {
-                    cbh.advance_backward();
+
+                    *index += 1;
+
+                    is_end_reached = *index == tokens.len() - 1;
                 }
+                Direction::Backwards => {
+                    if is_beginning_reached
+                        || (!is_end_reached
+                            && is_index_backwards_at_hard_separator
+                            && !is_index_forwards_at_hard_separator)
+                    {
+                        break;
+                    }
+
+                    *index -= 1;
+
+                    is_beginning_reached = *index == 0;
+                }
+            };
+
+            if is_end_reached && is_beginning_reached {
+                is_crop_size_or_both_ends_reached = true;
             }
-            // both are words, advance left then right if we haven't reached `crop_size`
-            [SimpleTokenKind::NonSeparator, SimpleTokenKind::NonSeparator] => {
-                cbh.advance_backward();
+
+            let maybe_is_token_hard_separator = tokens[*index]
+                .separator_kind()
+                .map(|sep_kind| matches!(sep_kind, SeparatorKind::Hard));
+
+            // it's not a separator
+            if maybe_is_token_hard_separator.is_none() {
+                *valid_index = *index;
                 words_count += 1;
 
-                if words_count != crop_size {
-                    cbh.advance_forward();
-                    words_count += 1;
+                if words_count == crop_size {
+                    is_crop_size_or_both_ends_reached = true;
                 }
+
+                break;
             }
-            [SimpleTokenKind::Done, SimpleTokenKind::Done] => break,
-            // if one of the tokens is non-separator and the other a separator, we expand in the non-separator side
-            // if one of the sides reached the end, we expand in the opposite direction
-            [backward_stk, SimpleTokenKind::Done]
-            | [backward_stk @ SimpleTokenKind::NonSeparator, SimpleTokenKind::Separator(_)] => {
-                if matches!(backward_stk, SimpleTokenKind::NonSeparator) {
-                    words_count += 1;
-                }
-                cbh.advance_backward();
-            }
-            [SimpleTokenKind::Done, forward_stk]
-            | [SimpleTokenKind::Separator(_), forward_stk @ SimpleTokenKind::NonSeparator] => {
-                if matches!(forward_stk, SimpleTokenKind::NonSeparator) {
-                    words_count += 1;
-                }
-                cbh.advance_forward();
-            }
+
+            let is_index_at_hard_separator = match dir {
+                Direction::Backwards => &mut is_index_backwards_at_hard_separator,
+                Direction::Forwards => &mut is_index_forwards_at_hard_separator,
+            };
+            *is_index_at_hard_separator =
+                maybe_is_token_hard_separator.is_some_and(|is_hard| is_hard);
         }
 
+        dir.switch();
+
+        // 1. if end is reached, we can only advance backwards
+        // 2. if forwards index reached a hard separator and backwards is currently hard, we can go backwards
+    }
+
+    // keep advancing forward to check if there's only separator tokens left until the end
+    // if so, then include those too in the index range
+    let mut try_index_forward = valid_index_forward + 1;
+    while let Some(token) = tokens.get(try_index_forward) {
+        if !token.is_separator() {
+            return [valid_index_backward, valid_index_forward];
+        }
+
+        try_index_forward += 1;
+    }
+
+    [valid_index_backward, try_index_forward - 1]
+}
+
+fn get_adjusted_index_forward_for_too_many_words(
+    tokens: &[Token],
+    index_backward: usize,
+    mut index_forward: usize,
+    mut words_count: usize,
+    crop_size: usize,
+) -> usize {
+    loop {
+        if index_forward == index_backward {
+            return index_forward;
+        }
+
+        index_forward -= 1;
+
+        if tokens[index_forward].is_separator() {
+            continue;
+        }
+
+        words_count -= 1;
+
         if words_count == crop_size {
             break;
         }
     }
 
-    [cbh.index_backward, cbh.index_forward]
-}
-
-fn get_adjusted_index_forward_for_too_many_words(
-    tokens: &[Token],
-    mut index_forward: usize,
-    mut words_count: usize,
-    crop_size: usize,
-) -> usize {
-    while index_forward != 0 {
-        if matches!(SimpleTokenKind::new(&tokens[index_forward]), SimpleTokenKind::NonSeparator) {
-            words_count -= 1;
-
-            if words_count == crop_size {
-                break;
-            }
-        }
-
-        index_forward -= 1;
-    }
-
-    if index_forward == 0 {
-        return index_forward;
-    }
-
-    index_forward - 1
+    index_forward
 }
 
 pub fn get_adjusted_indices_for_highlights_and_crop_size(
@@ -164,14 +165,12 @@ pub fn get_adjusted_indices_for_highlights_and_crop_size(
             words_count,
             crop_size,
         ),
-        Ordering::Equal => [
-            if index_backward != 0 { index_backward - 1 } else { index_backward },
-            if index_forward != tokens.len() - 1 { index_forward + 1 } else { index_forward },
-        ],
+        Ordering::Equal => [index_backward, index_forward],
         Ordering::Greater => [
             index_backward,
             get_adjusted_index_forward_for_too_many_words(
                 tokens,
+                index_backward,
                 index_forward,
                 words_count,
                 crop_size,
@@ -185,7 +184,7 @@ pub fn get_adjusted_index_forward_for_crop_size(tokens: &[Token], crop_size: usi
     let mut index = 0;
 
     while index != tokens.len() - 1 {
-        if matches!(SimpleTokenKind::new(&tokens[index]), SimpleTokenKind::NonSeparator) {
+        if !tokens[index].is_separator() {
             words_count += 1;
 
             if words_count == crop_size {
diff --git a/crates/milli/src/search/new/matches/match_bounds.rs b/crates/milli/src/search/new/matches/match_bounds.rs
index 5e6c0d7d7..44f88b648 100644
--- a/crates/milli/src/search/new/matches/match_bounds.rs
+++ b/crates/milli/src/search/new/matches/match_bounds.rs
@@ -14,6 +14,7 @@ use utoipa::ToSchema;
 
 use super::FormatOptions;
 
+// TODO: Differentiate if full match do not return None, instead return match bounds with full length
 #[derive(Serialize, ToSchema)]
 #[serde(rename_all = "camelCase")]
 pub struct MatchBounds {
@@ -158,20 +159,28 @@ impl MatchBoundsHelper<'_> {
     }
 
     /// For crop but no highlight.
-    fn get_crop_bounds_with_no_matches(&self, crop_size: usize) -> Option<MatchBounds> {
+    fn get_crop_bounds_with_no_matches(&self, crop_size: usize) -> MatchBounds {
         let final_token_index = get_adjusted_index_forward_for_crop_size(self.tokens, crop_size);
         let final_token = &self.tokens[final_token_index];
 
-        if final_token_index == self.tokens.len() - 1 {
-            return None;
-        }
-
         // TODO: Why is it that when we match all of the tokens we need to get byte_end instead of start?
 
-        Some(MatchBounds { highlight_toggle: false, indices: vec![0, final_token.byte_start] })
+        // TODO: Can here be an error, because it's byte_start but it could be byte_end?
+        MatchBounds { highlight_toggle: false, indices: vec![0, final_token.byte_start] }
     }
 
     fn get_matches_and_crop_indices(&self, crop_size: usize) -> MatchesAndCropIndices {
+        let asd = |i1, i2| {
+            println!(
+                "{}|{}|{}\n{} {}",
+                self.tokens[..i1].iter().map(|v| v.lemma()).collect::<Vec<_>>().join(""),
+                self.tokens[i1..i2].iter().map(|v| v.lemma()).collect::<Vec<_>>().join(""),
+                self.tokens[i2..].iter().map(|v| v.lemma()).collect::<Vec<_>>().join(""),
+                i1,
+                i2
+            );
+        };
+
         // TODO: This doesn't give back 2 phrases if one is out of crop window
         // Solution: also get next and previous matches, and if they're in the crop window, even if partially, highlight them
         let [matches_first_index, matches_last_index] =
@@ -196,28 +205,17 @@ impl MatchBoundsHelper<'_> {
             crop_size,
         );
 
-        let is_index_backward_at_limit = index_backward == 0;
-        let is_index_forward_at_limit = index_forward == self.tokens.len() - 1;
+        asd(first_match.get_first_token_pos(), last_match.get_last_token_pos());
+        asd(index_backward, index_forward);
 
         let backward_token = &self.tokens[index_backward];
-        let crop_byte_start = if is_index_backward_at_limit {
-            backward_token.byte_start
-        } else {
-            backward_token.byte_end
-        };
-
         let forward_token = &self.tokens[index_forward];
-        let crop_byte_end = if is_index_forward_at_limit {
-            forward_token.byte_end
-        } else {
-            forward_token.byte_start
-        };
 
         MatchesAndCropIndices {
             matches_first_index,
             matches_last_index,
-            crop_byte_start,
-            crop_byte_end,
+            crop_byte_start: backward_token.byte_start,
+            crop_byte_end: forward_token.byte_end,
         }
     }
 
@@ -248,7 +246,7 @@ impl MatchBounds {
 
         if let Some(crop_size) = format_options.crop.filter(|v| *v != 0) {
             if matches.is_empty() {
-                return mbh.get_crop_bounds_with_no_matches(crop_size);
+                return Some(mbh.get_crop_bounds_with_no_matches(crop_size));
             }
 
             if format_options.highlight {
@@ -258,15 +256,15 @@ impl MatchBounds {
             return Some(mbh.get_crop_bounds_with_matches(crop_size));
         }
 
-        if format_options.highlight && !matches.is_empty() {
-            Some(mbh.get_match_bounds(MatchesAndCropIndices {
-                matches_first_index: 0,
-                matches_last_index: matches.len() - 1,
-                crop_byte_start: 0,
-                crop_byte_end: tokens[tokens.len() - 1].byte_end,
-            }))
-        } else {
-            None
+        if !format_options.highlight || matches.is_empty() {
+            return None;
         }
+
+        Some(mbh.get_match_bounds(MatchesAndCropIndices {
+            matches_first_index: 0,
+            matches_last_index: matches.len() - 1,
+            crop_byte_start: 0,
+            crop_byte_end: tokens[tokens.len() - 1].byte_end,
+        }))
     }
 }
diff --git a/crates/milli/src/search/new/matches/matching_words.rs b/crates/milli/src/search/new/matches/matching_words.rs
index fe0aac40e..ab7f90f05 100644
--- a/crates/milli/src/search/new/matches/matching_words.rs
+++ b/crates/milli/src/search/new/matches/matching_words.rs
@@ -115,17 +115,21 @@ impl MatchingWords {
 
             let position = [*positions.start(), *positions.end()];
 
-            located_matching_phrases.reserve(matching_phrases.len());
-            located_matching_phrases.extend(matching_phrases.iter().map(|matching_phrase| {
-                LocatedMatchingPhrase { value: *matching_phrase, position }
-            }));
+            if !matching_phrases.is_empty() {
+                located_matching_phrases.reserve(matching_phrases.len());
+                located_matching_phrases.extend(matching_phrases.iter().map(|matching_phrase| {
+                    LocatedMatchingPhrase { value: *matching_phrase, position }
+                }));
+            }
 
-            located_matching_words.push(LocatedMatchingWords {
-                value: matching_words,
-                position,
-                is_prefix: term.is_prefix(),
-                original_char_count: term.original_word(&ctx).chars().count(),
-            });
+            if !matching_words.is_empty() {
+                located_matching_words.push(LocatedMatchingWords {
+                    value: matching_words,
+                    position,
+                    is_prefix: term.is_prefix(),
+                    original_char_count: term.original_word(&ctx).chars().count(),
+                });
+            }
         }
 
         // Sort words by having `is_prefix` as false first and then by their lengths in reverse order.
@@ -147,12 +151,11 @@ impl MatchingWords {
         token_position_helper_iter: &mut (impl Iterator<Item = TokenPositionHelper<'a>> + Clone),
     ) -> Option<(Match, UserQueryPositionRange)> {
         let mut mapped_phrase_iter = self.located_matching_phrases.iter().map(|lmp| {
-            let words_iter = self
-                .phrase_interner
-                .get(lmp.value)
-                .words
+            let words = &self.phrase_interner.get(lmp.value).words;
+
+            let words_iter = words
                 .iter()
-                .map(|word_option| word_option.map(|word| self.word_interner.get(word).as_str()))
+                .map(|maybe_word| maybe_word.map(|word| self.word_interner.get(word).as_str()))
                 .peekable();
 
             (lmp.position, words_iter)
@@ -161,7 +164,7 @@ impl MatchingWords {
         'outer: loop {
             let (query_position_range, mut words_iter) = mapped_phrase_iter.next()?;
 
-            // TODO: Is it worth only cloning if we have to?
+            // TODO: if it's worth it, clone only if we have to
             let mut tph_iter = token_position_helper_iter.clone();
 
             let mut first_tph_details = None;
@@ -241,46 +244,50 @@ impl MatchingWords {
         tph: TokenPositionHelper,
         text: &str,
     ) -> Option<(Match, UserQueryPositionRange)> {
-        let mut iter =
-            self.located_matching_words.iter().flat_map(|lw| lw.value.iter().map(move |w| (lw, w)));
+        // TODO: There is potentially an optimization to be made here
+        // if we matched a term then we can skip checking it for further iterations?
 
-        loop {
-            let (located_words, word) = iter.next()?;
-            let word = self.word_interner.get(*word);
+        self.located_matching_words
+            .iter()
+            .flat_map(|lw| lw.value.iter().map(move |w| (lw, w)))
+            .find_map(|(located_words, word)| {
+                let word = self.word_interner.get(*word);
 
-            let [char_count, byte_len] =
-                match PrefixedOrEquality::new(tph.token.lemma(), word, located_words.is_prefix) {
-                    PrefixedOrEquality::Prefixed => {
-                        let prefix_byte_len = text[tph.token.byte_start..]
-                            .char_indices()
-                            .nth(located_words.original_char_count - 1)
-                            .map(|(i, c)| i + c.len_utf8())
-                            .expect("expected text to have n-th thing bal bla TODO");
+                let [char_count, byte_len] =
+                    match PrefixedOrEquality::new(tph.token.lemma(), word, located_words.is_prefix)
+                    {
+                        PrefixedOrEquality::Prefixed => {
+                            let prefix_byte_len = text[tph.token.byte_start..]
+                                .char_indices()
+                                .nth(located_words.original_char_count - 1)
+                                .map(|(i, c)| i + c.len_utf8())
+                                .expect("expected text to have n-th thing bal bla TODO");
 
-                        // TODO: Investigate token original byte length and similar methods and why they're not good enough
+                            // TODO: Investigate token original byte length and similar methods and why they're not good enough
+                            //       That might be because token original byte length only or could also refer to the normalized byte length
 
-                        [located_words.original_char_count, prefix_byte_len]
-                    }
-                    // do not +1, because Token index ranges are exclusive
-                    PrefixedOrEquality::Equality => [
-                        tph.token.char_end - tph.token.char_start,
-                        tph.token.byte_end - tph.token.byte_start,
-                    ],
-                    _ => continue,
-                };
+                            [located_words.original_char_count, prefix_byte_len]
+                        }
+                        // do not +1, because Token index ranges are exclusive
+                        PrefixedOrEquality::Equality => [
+                            tph.token.char_end - tph.token.char_start,
+                            tph.token.byte_end - tph.token.byte_start,
+                        ],
+                        _ => return None,
+                    };
 
-            return Some((
-                Match {
-                    char_count,
-                    byte_len,
-                    position: MatchPosition::Word {
-                        word_position: tph.position_by_word,
-                        token_position: tph.position_by_token,
+                Some((
+                    Match {
+                        char_count,
+                        byte_len,
+                        position: MatchPosition::Word {
+                            word_position: tph.position_by_word,
+                            token_position: tph.position_by_token,
+                        },
                     },
-                },
-                located_words.position,
-            ));
-        }
+                    located_words.position,
+                ))
+            })
     }
 
     pub fn get_matches_and_query_positions(
@@ -361,93 +368,93 @@ impl Debug for MatchingWords {
     }
 }
 
-#[cfg(test)]
-pub(crate) mod tests {
-    use super::super::super::located_query_terms_from_tokens;
-    use super::*;
-    use crate::search::new::matches::tests::temp_index_with_documents;
-    use crate::search::new::query_term::ExtractedTokens;
-    use charabia::{TokenKind, TokenizerBuilder};
-    use std::borrow::Cow;
+// #[cfg(test)]
+// pub(crate) mod tests {
+//     use super::super::super::located_query_terms_from_tokens;
+//     use super::*;
+//     use crate::search::new::matches::tests::temp_index_with_documents;
+//     use crate::search::new::query_term::ExtractedTokens;
+//     use charabia::{TokenKind, TokenizerBuilder};
+//     use std::borrow::Cow;
 
-    #[test]
-    fn matching_words() {
-        let temp_index = temp_index_with_documents(None);
-        let rtxn = temp_index.read_txn().unwrap();
-        let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
-        let mut builder = TokenizerBuilder::default();
-        let tokenizer = builder.build();
-        let text = "split this world";
-        let tokens = tokenizer.tokenize(text);
-        let ExtractedTokens { query_terms, .. } =
-            located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
-        let matching_words = MatchingWords::new(ctx, &query_terms);
+//     #[test]
+//     fn matching_words() {
+//         let temp_index = temp_index_with_documents(None);
+//         let rtxn = temp_index.read_txn().unwrap();
+//         let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
+//         let mut builder = TokenizerBuilder::default();
+//         let tokenizer = builder.build();
+//         let text = "split this world";
+//         let tokens = tokenizer.tokenize(text);
+//         let ExtractedTokens { query_terms, .. } =
+//             located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
+//         let matching_words = MatchingWords::new(ctx, &query_terms);
 
-        assert_eq!(
-            matching_words.get_matches_and_query_positions(
-                &[
-                    Token {
-                        kind: TokenKind::Word,
-                        lemma: Cow::Borrowed("split"),
-                        char_end: "split".chars().count(),
-                        byte_end: "split".len(),
-                        ..Default::default()
-                    },
-                    Token {
-                        kind: TokenKind::Word,
-                        lemma: Cow::Borrowed("nyc"),
-                        char_end: "nyc".chars().count(),
-                        byte_end: "nyc".len(),
-                        ..Default::default()
-                    },
-                    Token {
-                        kind: TokenKind::Word,
-                        lemma: Cow::Borrowed("world"),
-                        char_end: "world".chars().count(),
-                        byte_end: "world".len(),
-                        ..Default::default()
-                    },
-                    Token {
-                        kind: TokenKind::Word,
-                        lemma: Cow::Borrowed("worlded"),
-                        char_end: "worlded".chars().count(),
-                        byte_end: "worlded".len(),
-                        ..Default::default()
-                    },
-                    Token {
-                        kind: TokenKind::Word,
-                        lemma: Cow::Borrowed("thisnew"),
-                        char_end: "thisnew".chars().count(),
-                        byte_end: "thisnew".len(),
-                        ..Default::default()
-                    }
-                ],
-                text
-            ),
-            (
-                vec![
-                    Match {
-                        char_count: 5,
-                        byte_len: 5,
-                        position: MatchPosition::Word { word_position: 0, token_position: 0 }
-                    },
-                    Match {
-                        char_count: 5,
-                        byte_len: 5,
-                        position: MatchPosition::Word { word_position: 2, token_position: 2 }
-                    },
-                    Match {
-                        char_count: 5,
-                        byte_len: 5,
-                        position: MatchPosition::Word { word_position: 3, token_position: 3 }
-                    }
-                ],
-                vec![
-                    QueryPosition { range: [0, 0], index: 0 },
-                    QueryPosition { range: [2, 2], index: 1 },
-                    QueryPosition { range: [2, 2], index: 2 }
-                ]
-            )
-        );
-    }
-}
+//         assert_eq!(
+//             matching_words.get_matches_and_query_positions(
+//                 &[
+//                     Token {
+//                         kind: TokenKind::Word,
+//                         lemma: Cow::Borrowed("split"),
+//                         char_end: "split".chars().count(),
+//                         byte_end: "split".len(),
+//                         ..Default::default()
+//                     },
+//                     Token {
+//                         kind: TokenKind::Word,
+//                         lemma: Cow::Borrowed("nyc"),
+//                         char_end: "nyc".chars().count(),
+//                         byte_end: "nyc".len(),
+//                         ..Default::default()
+//                     },
+//                     Token {
+//                         kind: TokenKind::Word,
+//                         lemma: Cow::Borrowed("world"),
+//                         char_end: "world".chars().count(),
+//                         byte_end: "world".len(),
+//                         ..Default::default()
+//                     },
+//                     Token {
+//                         kind: TokenKind::Word,
+//                         lemma: Cow::Borrowed("worlded"),
+//                         char_end: "worlded".chars().count(),
+//                         byte_end: "worlded".len(),
+//                         ..Default::default()
+//                     },
+//                     Token {
+//                         kind: TokenKind::Word,
+//                         lemma: Cow::Borrowed("thisnew"),
+//                         char_end: "thisnew".chars().count(),
+//                         byte_end: "thisnew".len(),
+//                         ..Default::default()
+//                     }
+//                 ],
+//                 text
+//             ),
+//             (
+//                 vec![
+//                     Match {
+//                         char_count: 5,
+//                         byte_len: 5,
+//                         position: MatchPosition::Word { word_position: 0, token_position: 0 }
+//                     },
+//                     Match {
+//                         char_count: 5,
+//                         byte_len: 5,
+//                         position: MatchPosition::Word { word_position: 2, token_position: 2 }
+//                     },
+//                     Match {
+//                         char_count: 5,
+//                         byte_len: 5,
+//                         position: MatchPosition::Word { word_position: 3, token_position: 3 }
+//                     }
+//                 ],
+//                 vec![
+//                     QueryPosition { range: [0, 0], index: 0 },
+//                     QueryPosition { range: [2, 2], index: 1 },
+//                     QueryPosition { range: [2, 2], index: 2 }
+//                 ]
+//             )
+//         );
+//     }
+// }
diff --git a/crates/milli/src/search/new/matches/mod.rs b/crates/milli/src/search/new/matches/mod.rs
index 0eab29d93..bab82da8c 100644
--- a/crates/milli/src/search/new/matches/mod.rs
+++ b/crates/milli/src/search/new/matches/mod.rs
@@ -48,10 +48,9 @@ impl<'a> MatcherBuilder<'a> {
     }
 }
 
-#[derive(Copy, Clone, Default, Debug)]
+#[derive(Copy, Clone, Default)]
 pub struct FormatOptions {
     pub highlight: bool,
-    // TODO: Should this be usize?
     pub crop: Option<usize>,
 }
 
@@ -80,7 +79,9 @@ impl Matcher<'_, '_, '_, '_> {
     /// TODO: description
     pub fn get_match_bounds(
         &mut self,
-        // TODO: Add option to count grapheme clusters instead of bytes
+        // TODO: Add option to count UTF-16 segments, or whatever JS works with when slicing strings
+        // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String#utf-16_characters_unicode_code_points_and_grapheme_clusters
+        // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/slice
         format_options: Option<FormatOptions>,
     ) -> Option<MatchBounds> {
         if self.text.is_empty() {
@@ -152,7 +153,6 @@ mod tests {
     use crate::index::tests::TempIndex;
     use crate::{execute_search, filtered_universe, SearchContext, TimeBudget};
     use charabia::TokenizerBuilder;
-    use memmap2::Mmap;
 
     impl<'a> MatcherBuilder<'a> {
         fn new_test(rtxn: &'a heed::RoTxn<'a>, index: &'a TempIndex, query: &str) -> Self {
@@ -180,10 +180,9 @@ mod tests {
             .unwrap();
 
             // consume context and located_query_terms to build MatchingWords.
-            let matching_words = match located_query_terms {
-                Some(located_query_terms) => MatchingWords::new(ctx, &located_query_terms),
-                None => MatchingWords::default(),
-            };
+            let matching_words = located_query_terms
+                .map(|located_query_terms| MatchingWords::new(ctx, &located_query_terms))
+                .unwrap_or_default();
 
             MatcherBuilder::new(
                 matching_words,
@@ -197,283 +196,401 @@ mod tests {
         }
     }
 
-    pub fn temp_index_with_documents(documents: Option<Mmap>) -> TempIndex {
+    pub fn rename_me(
+        format_options: Option<FormatOptions>,
+        text: &str,
+        query: &str,
+        expected_text: &str,
+    ) {
         let temp_index = TempIndex::new();
 
+        // document will always contain the same exact text normally
+        // TODO: Describe this better and ask if this is actually the case
         temp_index
-            .add_documents(documents.unwrap_or_else(|| {
-                documents!([
-                    { "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
-                    { "id": 2, "name": "Westfália" },
-                    { "id": 3, "name": "Ŵôřlḑôle" },
-                ])
-            }))
+            .add_documents(documents!([
+                { "id": 1, "text": text.to_string() },
+            ]))
             .unwrap();
 
-        temp_index
-    }
-
-    fn get_expected_maybe_text(expected_text: &str, text: &str) -> Option<String> {
-        if expected_text == text {
-            None
-        } else {
-            Some(expected_text.to_string())
-        }
-    }
-
-    #[test]
-    fn format_identity() {
-        let temp_index = temp_index_with_documents(None);
         let rtxn = temp_index.read_txn().unwrap();
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
-        let format_options = Some(FormatOptions { highlight: false, crop: None });
-
-        let test_values = [
-            // Text without any match.
-            "A quick brown fox can not jump 32 feet, right? Brr, it is cold!", 
-            // Text containing all matches.
-            "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.", 
-            // Text containing some matches.
-            "Natalie risk her future to build a world with the boy she loves."
-        ];
-
-        for text in test_values {
-            let mut matcher = builder.build(text, None);
-            // no crop and no highlight should return complete text.
-            assert_eq!(matcher.get_formatted_text(format_options), None);
-        }
-    }
-
-    #[test]
-    fn format_highlight() {
-        let temp_index = temp_index_with_documents(None);
-        let rtxn = temp_index.read_txn().unwrap();
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
-        let format_options = Some(FormatOptions { highlight: true, crop: None });
-
-        let test_values = [
-            // empty text.
-            ["", ""],
-            // text containing only separators.
-            [":-)", ":-)"],
-            // Text without any match.
-            ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
-             "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"],
-            // Text containing all matches.
-            ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.", 
-             "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."],
-            // Text containing some matches.
-            ["Natalie risk her future to build a world with the boy she loves.",
-             "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves."],
-        ];
-
-        for [text, expected_text] in test_values {
-            let mut matcher = builder.build(text, None);
-            // no crop should return complete text with highlighted matches.
-            assert_eq!(
-                matcher.get_formatted_text(format_options),
-                get_expected_maybe_text(expected_text, text)
-            );
-        }
-    }
-
-    #[test]
-    fn highlight_unicode() {
-        let temp_index = temp_index_with_documents(None);
-        let rtxn = temp_index.read_txn().unwrap();
-        let format_options = Some(FormatOptions { highlight: true, crop: None });
-
-        let test_values = [
-            // Text containing prefix match.
-            ["world", "Ŵôřlḑôle", "<em>Ŵôřlḑ</em>ôle"],
-            // Text containing unicode match.
-            ["world", "Ŵôřlḑ", "<em>Ŵôřlḑ</em>"],
-            // Text containing unicode match.
-            ["westfali", "Westfália", "<em>Westfáli</em>a"],
-        ];
-
-        for [query, text, expected_text] in test_values {
-            let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query);
-            let mut matcher = builder.build(text, None);
-            // no crop should return complete text with highlighted matches.
-            assert_eq!(
-                matcher.get_formatted_text(format_options),
-                get_expected_maybe_text(expected_text, text)
-            );
-        }
-    }
-
-    #[test]
-    fn format_crop() {
-        let temp_index = temp_index_with_documents(None);
-        let rtxn = temp_index.read_txn().unwrap();
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
-        let format_options = Some(FormatOptions { highlight: false, crop: Some(10) });
-
-        let test_values = [
-            // empty text.
-            ["", ""],
-            // text containing only separators.
-            [":-)", ":-)"],
-            // Text without any match.
-            ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
-             "A quick brown fox can not jump 32 feet, right…"],
-            // Text without any match starting by a separator.
-            ["(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)",
-             "(A quick brown fox can not jump 32 feet, right…" ],
-            // Test phrase propagation
-            ["Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.",
-             "…Split The World is a book written by Emily Henry…"],
-            // Text containing some matches.
-            ["Natalie risk her future to build a world with the boy she loves.",
-             "…future to build a world with the boy she loves."],
-            // Text containing all matches.
-            ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
-             "…she loves. Emily Henry: The Love That Split The World."],
-            // Text containing a match unordered and a match ordered.
-            ["The world split void void void void void void void void void split the world void void",
-             "…void void void void void split the world void void"],
-            // Text containing matches with different density.
-            ["split void the void void world void void void void void void void void void void split the world void void",
-             "…void void void void void split the world void void"],
-            ["split split split split split split void void void void void void void void void void split the world void void",
-             "…void void void void void split the world void void"]
-        ];
-
-        for [text, expected_text] in test_values {
-            let mut matcher = builder.build(text, None);
-            // no crop should return complete text with highlighted matches.
-            assert_eq!(
-                matcher.get_formatted_text(format_options),
-                get_expected_maybe_text(expected_text, text)
-            );
-        }
-    }
-
-    #[test]
-    fn format_highlight_crop() {
-        let temp_index = temp_index_with_documents(None);
-        let rtxn = temp_index.read_txn().unwrap();
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
-        let format_options = Some(FormatOptions { highlight: true, crop: Some(10) });
-
-        let test_values = [
-            // empty text.
-            ["", ""],
-            // text containing only separators.
-            [":-)", ":-)"],
-            // Text without any match.
-            ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!", 
-             "A quick brown fox can not jump 32 feet, right…"],
-            // Text containing some matches.
-            ["Natalie risk her future to build a world with the boy she loves.", 
-             "…future to build a <em>world</em> with <em>the</em> boy she loves."],
-            // Text containing all matches.
-            ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.", 
-             "…she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."],
-            // Text containing a match unordered and a match ordered.
-            ["The world split void void void void void void void void void split the world void void", 
-             "…void void void void void <em>split</em> <em>the</em> <em>world</em> void void"]
-        ];
-
-        for [text, expected_text] in test_values {
-            let mut matcher = builder.build(text, None);
-            // no crop should return complete text with highlighted matches.
-            assert_eq!(
-                matcher.get_formatted_text(format_options),
-                get_expected_maybe_text(expected_text, text)
-            );
-        }
-    }
-
-    #[test]
-    fn format_highlight_crop_phrase_query() {
-        //! testing: https://github.com/meilisearch/meilisearch/issues/3975
-        let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
-        let temp_index = temp_index_with_documents(Some(documents!([
-            { "id": 1, "text": text }
-        ])));
-        let rtxn = temp_index.read_txn().unwrap();
-
-        let format_options = Some(FormatOptions { highlight: true, crop: Some(10) });
-
-        let test_values = [
-            // should return 10 words with a marker at the start as well the end, and the highlighted matches.
-            ["\"the world\"",
-             "…the power to split <em>the world</em> between those who embraced…"],
-            // should highlight "those" and the phrase "and those".
-            ["those \"and those\"", 
-             "…world between <em>those</em> who embraced progress <em>and those</em> who resisted…"],
-            ["\"The groundbreaking invention had the power to split the world\"", 
-             "<em>The groundbreaking invention had the power to split the world</em>…"],
-            ["\"The groundbreaking invention had the power to split the world between those\"", 
-             "<em>The groundbreaking invention had the power to split the world</em>…"],
-            ["\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"", 
-             "…between those who <em>embraced progress and those who resisted change</em>!"],
-            ["\"groundbreaking invention\" \"split the world between\"",
-             "…<em>groundbreaking invention</em> had the power to <em>split the world between</em>…"],
-            ["\"groundbreaking invention\" \"had the power to split the world between those\"", 
-             "…<em>invention</em> <em>had the power to split the world between those</em>…"],
-        ];
-
-        for [query, expected_text] in test_values {
-            let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query);
-            let mut matcher = builder.build(text, None);
-
-            assert_eq!(
-                matcher.get_formatted_text(format_options),
-                get_expected_maybe_text(expected_text, text)
-            );
-        }
-    }
-
-    #[test]
-    fn smaller_crop_size() {
-        //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
-        let temp_index = temp_index_with_documents(None);
-        let rtxn = temp_index.read_txn().unwrap();
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
-        let text = "void void split the world void void.";
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query);
         let mut matcher = builder.build(text, None);
 
-        let test_values = [
-            // set a smaller crop size
-            // because crop size < query size, partially format matches.
-            (2, "…split the…"),
-            // set a smaller crop size
-            // because crop size < query size, partially format matches.
-            (1, "…split…"),
-            // set  crop size to 0
-            // because crop size is 0, crop is ignored.
-            (0, "void void split the world void void."),
-        ];
-
-        for (crop_size, expected_text) in test_values {
-            // set a smaller crop size
-            let format_options = Some(FormatOptions { highlight: false, crop: Some(crop_size) });
-            assert_eq!(
-                matcher.get_formatted_text(format_options),
-                get_expected_maybe_text(expected_text, text)
-            );
-        }
+        assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
     }
 
-    #[test]
-    fn partial_matches() {
-        let temp_index = temp_index_with_documents(None);
-        let rtxn = temp_index.read_txn().unwrap();
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "the \"t he\" door \"do or\"");
-
-        let format_options = Some(FormatOptions { highlight: true, crop: None });
-
-        let text = "the do or die can't be he do and or isn't he";
-        let mut matcher = builder.build(text, None);
-        assert_eq!(
-            matcher.get_formatted_text(format_options),
-            Some(
-                "<em>the</em> <em>do or</em> die can't be he do and or isn'<em>t he</em>"
-                    .to_string()
-            )
+    /// "Dei store fiskane eta dei små — dei liger under som minst förmå."
+    ///
+    /// (Men are like fish; the great ones devour the small.)
+    fn rename_me_with_base_text(
+        format_options: Option<FormatOptions>,
+        query: &str,
+        expected_text: &str,
+    ) {
+        rename_me(
+            format_options,
+            "Dei store fiskane eta dei små — dei liger under som minst förmå.",
+            query,
+            expected_text,
         );
     }
+
+    #[test]
+    fn phrase_highlight_bigger_than_crop() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: true, crop: Some(1) }),
+            "\"dei liger\"",
+            "…<em>dei</em>…",
+        );
+    }
+
+    #[test]
+    fn phrase_highlight_same_size_as_crop() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: true, crop: Some(2) }),
+            "\"dei liger\"",
+            "…<em>dei liger</em>…",
+        );
+    }
+
+    #[test]
+    fn phrase_highlight_crop_middle() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: true, crop: Some(4) }),
+            "\"dei liger\"",
+            "…små — <em>dei liger</em> under…",
+        );
+    }
+
+    #[test]
+    fn phrase_highlight_crop_end() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: true, crop: Some(4) }),
+            "\"minst förmå\"",
+            "…under som <em>minst förmå</em>.",
+        );
+    }
+
+    #[test]
+    fn phrase_highlight_crop_beginning() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: true, crop: Some(4) }),
+            "\"Dei store\"",
+            "<em>Dei store</em> fiskane eta…",
+        );
+    }
+
+    #[test]
+    fn highlight_end() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: true, crop: None }),
+            "minst förmå",
+            "Dei store fiskane eta dei små — dei liger under som <em>minst</em> <em>förmå</em>.",
+        );
+    }
+
+    #[test]
+    fn highlight_beginning_and_middle() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: true, crop: None }),
+            "Dei store",
+            "<em>Dei</em> <em>store</em> fiskane eta <em>dei</em> små — <em>dei</em> liger under som minst förmå.",
+        );
+    }
+
+    #[test]
+    fn partial_match_middle() {
+        // TODO: Is this intentional?
+        // Here the only interned word is "forma", hence it cannot find the searched prefix
+        // word "fo" inside "forma" within milli::search::new::matches::matching_words::MatchingWords::try_get_word_match
+        // `milli::search::new::query_term::QueryTerm::all_computed_derivations` might be at fault here
+
+        // interned words = ["forma"]
+        rename_me(
+            Some(FormatOptions { highlight: true, crop: None }),
+            "altså, förmå, på en måte",
+            "fo",
+            "altså, <em>förmå</em>, på en måte",
+        );
+
+        // interned words = ["fo", "forma"]
+        rename_me(
+            Some(FormatOptions { highlight: true, crop: None }),
+            "altså, fo förmå, på en måte",
+            "fo",
+            "altså, <em>fo</em> <em>fö</em>rmå, på en måte",
+        );
+    }
+
+    #[test]
+    fn partial_match_end() {
+        rename_me(
+            Some(FormatOptions { highlight: true, crop: None }),
+            "förmå, på en måte",
+            "fo",
+            "<em>förmå</em>, på en måte",
+        );
+
+        rename_me(
+            Some(FormatOptions { highlight: true, crop: None }),
+            "fo förmå, på en måte",
+            "fo",
+            "<em>fo</em> <em>fö</em>rmå, på en måte",
+        );
+    }
+
+    #[test]
+    fn partial_match_beginning() {
+        rename_me(
+            Some(FormatOptions { highlight: true, crop: None }),
+            "altså, förmå",
+            "fo",
+            "altså, <em>förmå</em>",
+        );
+
+        rename_me(
+            Some(FormatOptions { highlight: true, crop: None }),
+            "altså, fo förmå",
+            "fo",
+            "altså, <em>fo</em> <em>fö</em>rmå",
+        );
+    }
+
+    // #[test]
+    // fn format_identity() {
+    //     let temp_index = temp_index_with_documents(None);
+    //     let rtxn = temp_index.read_txn().unwrap();
+    //     let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
+    //     let format_options = Some(FormatOptions { highlight: false, crop: None });
+
+    //     let test_values = [
+    //         // Text without any match.
+    //         "A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
+    //         // Text containing all matches.
+    //         "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
+    //         // Text containing some matches.
+    //         "Natalie risk her future to build a world with the boy she loves."
+    //     ];
+
+    //     for text in test_values {
+    //         let mut matcher = builder.build(text, None);
+    //         // no crop and no highlight should return complete text.
+    //         assert_eq!(matcher.get_formatted_text(format_options), None);
+    //     }
+    // }
+
+    // #[test]
+    // fn format_highlight() {
+    //     let temp_index = temp_index_with_documents(None);
+    //     let rtxn = temp_index.read_txn().unwrap();
+    //     let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
+    //     let format_options = Some(FormatOptions { highlight: true, crop: None });
+
+    //     let test_values = [
+    //         // empty text.
+    //         ["", ""],
+    //         // text containing only separators.
+    //         [":-)", ":-)"],
+    //         // Text without any match.
+    //         ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
+    //          "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"],
+    //         // Text containing all matches.
+    //         ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
+    //          "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."],
+    //         // Text containing some matches.
+    //         ["Natalie risk her future to build a world with the boy she loves.",
+    //          "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves."],
+    //     ];
+
+    //     for [text, expected_text] in test_values {
+    //         let mut matcher = builder.build(text, None);
+    //         // no crop should return complete text with highlighted matches.
+    //         assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
+    //     }
+    // }
+
+    // #[test]
+    // fn highlight_unicode() {
+    //     let temp_index = temp_index_with_documents(None);
+    //     let rtxn = temp_index.read_txn().unwrap();
+    //     let format_options = Some(FormatOptions { highlight: true, crop: None });
+
+    //     let test_values = [
+    //         // Text containing prefix match.
+    //         ["world", "Ŵôřlḑôle", "<em>Ŵôřlḑ</em>ôle"],
+    //         // Text containing unicode match.
+    //         ["world", "Ŵôřlḑ", "<em>Ŵôřlḑ</em>"],
+    //         // Text containing unicode match.
+    //         ["westfali", "Westfália", "<em>Westfáli</em>a"],
+    //     ];
+
+    //     for [query, text, expected_text] in test_values {
+    //         let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query);
+    //         let mut matcher = builder.build(text, None);
+    //         // no crop should return complete text with highlighted matches.
+    //         assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
+    //     }
+    // }
+
+    // #[test]
+    // fn format_crop() {
+    //     let temp_index = temp_index_with_documents(None);
+    //     let rtxn = temp_index.read_txn().unwrap();
+    //     let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
+    //     let format_options = Some(FormatOptions { highlight: false, crop: Some(10) });
+
+    //     let test_values = [
+    //         // empty text.
+    //         // ["", ""],
+    //         // text containing only separators.
+    //         // [":-)", ":-)"],
+    //         // Text without any match.
+    //         ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
+    //          "A quick brown fox can not jump 32 feet, right…"],
+    //         // Text without any match starting by a separator.
+    //         ["(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)",
+    //          "(A quick brown fox can not jump 32 feet, right…" ],
+    //         // Test phrase propagation
+    //         ["Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.",
+    //          "…Split The World is a book written by Emily Henry…"],
+    //         // Text containing some matches.
+    //         ["Natalie risk her future to build a world with the boy she loves.",
+    //          "…future to build a world with the boy she loves."],
+    //         // Text containing all matches.
+    //         ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
+    //          "…she loves. Emily Henry: The Love That Split The World."],
+    //         // Text containing a match unordered and a match ordered.
+    //         ["The world split void void void void void void void void void split the world void void",
+    //          "…void void void void void split the world void void"],
+    //         // Text containing matches with different density.
+    //         ["split void the void void world void void void void void void void void void void split the world void void",
+    //          "…void void void void void split the world void void"],
+    //         ["split split split split split split void void void void void void void void void void split the world void void",
+    //          "…void void void void void split the world void void"]
+    //     ];
+
+    //     for [text, expected_text] in test_values {
+    //         let mut matcher = builder.build(text, None);
+    //         // no crop should return complete text with highlighted matches.
+    //         assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
+    //     }
+    // }
+
+    // #[test]
+    // fn format_highlight_crop() {
+    //     let temp_index = temp_index_with_documents(None);
+    //     let rtxn = temp_index.read_txn().unwrap();
+    //     let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
+    //     let format_options = Some(FormatOptions { highlight: true, crop: Some(10) });
+
+    //     let test_values = [
+    //         // empty text.
+    //         ["", ""],
+    //         // text containing only separators.
+    //         [":-)", ":-)"],
+    //         // Text without any match.
+    //         ["A quick brown fox can not jump 32 feet, right? Brr, it is cold!",
+    //          "A quick brown fox can not jump 32 feet, right…"],
+    //         // Text containing some matches.
+    //         ["Natalie risk her future to build a world with the boy she loves.",
+    //          "…future to build a <em>world</em> with <em>the</em> boy she loves."],
+    //         // Text containing all matches.
+    //         ["Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.",
+    //          "…she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."],
+    //         // Text containing a match unordered and a match ordered.
+    //         ["The world split void void void void void void void void void split the world void void",
+    //          "…void void void void void <em>split</em> <em>the</em> <em>world</em> void void"]
+    //     ];
+
+    //     for [text, expected_text] in test_values {
+    //         let mut matcher = builder.build(text, None);
+    //         // no crop should return complete text with highlighted matches.
+    //         assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
+    //     }
+    // }
+
+    // #[test]
+    // fn format_highlight_crop_phrase_query() {
+    //     //! testing: https://github.com/meilisearch/meilisearch/issues/3975
+    //     let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
+    //     let temp_index = temp_index_with_documents(Some(documents!([
+    //         { "id": 1, "text": text }
+    //     ])));
+    //     let rtxn = temp_index.read_txn().unwrap();
+
+    //     let format_options = Some(FormatOptions { highlight: true, crop: Some(10) });
+
+    //     let test_values = [
+    //         // should return 10 words with a marker at the start as well the end, and the highlighted matches.
+    //         ["\"the world\"",
+    //          "…the power to split <em>the world</em> between those who embraced…"],
+    //         // should highlight "those" and the phrase "and those".
+    //         ["those \"and those\"",
+    //          "…world between <em>those</em> who embraced progress <em>and those</em> who resisted…"],
+    //         ["\"The groundbreaking invention had the power to split the world\"",
+    //          "<em>The groundbreaking invention had the power to split the world</em>…"],
+    //         ["\"The groundbreaking invention had the power to split the world between those\"",
+    //          "<em>The groundbreaking invention had the power to split the world</em>…"],
+    //         ["\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"",
+    //          "…between those who <em>embraced progress and those who resisted change</em>!"],
+    //         ["\"groundbreaking invention\" \"split the world between\"",
+    //          "…<em>groundbreaking invention</em> had the power to <em>split the world between</em>…"],
+    //         ["\"groundbreaking invention\" \"had the power to split the world between those\"",
+    //          "…<em>invention</em> <em>had the power to split the world between those</em>…"],
+    //     ];
+
+    //     for [query, expected_text] in test_values {
+    //         let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query);
+    //         let mut matcher = builder.build(text, None);
+
+    //         assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
+    //     }
+    // }
+
+    // #[test]
+    // fn smaller_crop_size() {
+    //     //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
+    //     let temp_index = temp_index_with_documents(None);
+    //     let rtxn = temp_index.read_txn().unwrap();
+    //     let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
+    //     let text = "void void split the world void void.";
+    //     let mut matcher = builder.build(text, None);
+
+    //     let test_values = [
+    //         // set a smaller crop size
+    //         // because crop size < query size, partially format matches.
+    //         (2, "…split the…"),
+    //         // set a smaller crop size
+    //         // because crop size < query size, partially format matches.
+    //         (1, "…split…"),
+    //         // set  crop size to 0
+    //         // because crop size is 0, crop is ignored.
+    //         (0, "void void split the world void void."),
+    //     ];
+
+    //     for (crop_size, expected_text) in test_values {
+    //         // set a smaller crop size
+    //         let format_options = Some(FormatOptions { highlight: false, crop: Some(crop_size) });
+    //         assert_eq!(matcher.get_formatted_text(format_options), Some(expected_text.to_string()));
+    //     }
+    // }
+
+    // #[test]
+    // fn partial_matches() {
+    //     let temp_index = temp_index_with_documents(None);
+    //     let rtxn = temp_index.read_txn().unwrap();
+    //     let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "the \"t he\" door \"do or\"");
+
+    //     let format_options = Some(FormatOptions { highlight: true, crop: None });
+
+    //     let text = "the do or die can't be he do and or isn't he";
+    //     let mut matcher = builder.build(text, None);
+    //     assert_eq!(
+    //         matcher.get_formatted_text(format_options),
+    //         Some(
+    //             "<em>the</em> <em>do or</em> die can't be he do and or isn'<em>t he</em>"
+    //                 .to_string()
+    //         )
+    //     );
+    // }
 }
diff --git a/crates/milli/src/search/new/query_term/mod.rs b/crates/milli/src/search/new/query_term/mod.rs
index ba8964e34..748248fc3 100644
--- a/crates/milli/src/search/new/query_term/mod.rs
+++ b/crates/milli/src/search/new/query_term/mod.rs
@@ -489,8 +489,7 @@ impl QueryTerm {
         let mut words = BTreeSet::new();
         let mut phrases = BTreeSet::new();
 
-        let ZeroTypoTerm { phrase, exact: zero_typo, prefix_of, synonyms, use_prefix_db: _ } =
-            &self.zero_typo;
+        let ZeroTypoTerm { phrase, exact: zero_typo, prefix_of, synonyms, .. } = &self.zero_typo;
         words.extend(zero_typo.iter().copied());
         words.extend(prefix_of.iter().copied());
         phrases.extend(phrase.iter().copied());