diff --git a/crates/meilisearch/src/routes/indexes/documents.rs b/crates/meilisearch/src/routes/indexes/documents.rs
index 50eec46fe..919fb0a74 100644
--- a/crates/meilisearch/src/routes/indexes/documents.rs
+++ b/crates/meilisearch/src/routes/indexes/documents.rs
@@ -1551,9 +1551,10 @@ fn retrieve_documents<S: AsRef<str>>(
             Ok(match &attributes_to_retrieve {
                 Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
                     &document?,
-                    attributes_to_retrieve.iter().map(|s| s.as_ref()).chain(
-                        (retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors"),
-                    ),
+                    attributes_to_retrieve
+                        .iter()
+                        .map(|s| s.as_ref())
+                        .chain(retrieve_vectors.should_retrieve().then_some("_vectors")),
                 ),
                 None => document?,
             })
@@ -1586,7 +1587,7 @@ fn retrieve_document<S: AsRef<str>>(
             attributes_to_retrieve
                 .iter()
                 .map(|s| s.as_ref())
-                .chain((retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors")),
+                .chain(retrieve_vectors.should_retrieve().then_some("_vectors")),
         ),
         None => document,
     };
diff --git a/crates/meilisearch/src/search/federated/perform.rs b/crates/meilisearch/src/search/federated/perform.rs
index 5ad64d63c..0c5a94b4c 100644
--- a/crates/meilisearch/src/search/federated/perform.rs
+++ b/crates/meilisearch/src/search/federated/perform.rs
@@ -815,7 +815,8 @@ impl SearchByIndex {
 
                 let (result, _semantic_hit_count) =
                     super::super::search_from_kind(index_uid.to_string(), search_kind, search)?;
-                let format = AttributesFormat {
+
+                let attributes_format = AttributesFormat {
                     attributes_to_retrieve: query.attributes_to_retrieve,
                     retrieve_vectors,
                     attributes_to_highlight: query.attributes_to_highlight,
@@ -846,12 +847,11 @@ impl SearchByIndex {
 
                 let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref());
 
-                let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
-
                 let hit_maker =
-                    HitMaker::new(&index, &rtxn, format, formatter_builder).map_err(|e| {
-                        MeilisearchHttpError::from_milli(e, Some(index_uid.to_string()))
-                    })?;
+                    HitMaker::new(matching_words, tokenizer, attributes_format, &index, &rtxn)
+                        .map_err(|e| {
+                            MeilisearchHttpError::from_milli(e, Some(index_uid.to_string()))
+                        })?;
 
                 results_by_query.push(SearchResultByQuery {
                     weight,
diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs
index 5e543c53f..8ea21cf32 100644
--- a/crates/meilisearch/src/search/mod.rs
+++ b/crates/meilisearch/src/search/mod.rs
@@ -1,4 +1,5 @@
 use core::fmt;
+use std::borrow::Cow;
 use std::cmp::min;
 use std::collections::{BTreeMap, BTreeSet, HashSet};
 use std::str::FromStr;
@@ -28,11 +29,11 @@ use meilisearch_types::{milli, Document};
 use milli::tokenizer::{Language, TokenizerBuilder};
 use milli::{
     AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, LocalizedAttributesRule,
-    MatchBounds, MatcherBuilder, SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
+    MarkerOptions, MatcherBuilder, SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
 };
 use regex::Regex;
 use serde::{Deserialize, Serialize};
-use serde_json::{json, Value};
+use serde_json::{json, Map, Value};
 #[cfg(test)]
 mod mod_test;
 use utoipa::ToSchema;
@@ -47,7 +48,9 @@ pub use federated::{
 
 mod ranking_rules;
 
-type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
+// TODO: Adapt this type to support cropping
+// { "_matchesPosition": { "overview": { first: false, highlighted: [[0,4,6,11,5,234,6,241,5]] } } }
+// type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
 
 pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
 pub const DEFAULT_SEARCH_LIMIT: fn() -> usize = || 20;
@@ -810,11 +813,9 @@ pub struct SearchHit {
     #[serde(flatten)]
     #[schema(additional_properties, inline, value_type = HashMap<String, Value>)]
     pub document: Document,
-    #[serde(default, rename = "_formatted", skip_serializing_if = "Document::is_empty")]
+    #[serde(default, rename = "_formatted", skip_serializing_if = "Option::is_none")]
     #[schema(additional_properties, value_type = HashMap<String, Value>)]
-    pub formatted: Document,
-    #[serde(default, rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
-    pub matches_position: Option<MatchesPosition>,
+    pub formatted: Option<Document>,
     #[serde(default, rename = "_rankingScore", skip_serializing_if = "Option::is_none")]
     pub ranking_score: Option<f64>,
     #[serde(default, rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")]
@@ -1291,6 +1292,7 @@ struct AttributesFormat {
     crop_marker: String,
     highlight_pre_tag: String,
     highlight_post_tag: String,
+    // TODO: Might want to rename this to signify that this will not yield _formatted anymore, only positions
     show_matches_position: bool,
     sort: Option<Vec<String>>,
     show_ranking_score: bool,
@@ -1298,7 +1300,7 @@ struct AttributesFormat {
     locales: Option<Vec<Language>>,
 }
 
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[derive(Debug, Clone, Copy)]
 pub enum RetrieveVectors {
     /// Remove the `_vectors` field
     ///
@@ -1318,6 +1320,10 @@ impl RetrieveVectors {
             Self::Hide
         }
     }
+
+    pub fn should_retrieve(&self) -> bool {
+        matches!(self, Self::Retrieve)
+    }
 }
 
 struct HitMaker<'a> {
@@ -1329,7 +1335,7 @@ struct HitMaker<'a> {
     retrieve_vectors: RetrieveVectors,
     to_retrieve_ids: BTreeSet<FieldId>,
     embedding_configs: Vec<index::IndexEmbeddingConfig>,
-    formatter_builder: MatcherBuilder<'a>,
+    matcher_builder: MatcherBuilder<'a>,
     formatted_options: BTreeMap<FieldId, FormatOptions>,
     show_ranking_score: bool,
     show_ranking_score_details: bool,
@@ -1357,24 +1363,20 @@ impl<'a> HitMaker<'a> {
         tokenizer_builder.into_tokenizer()
     }
 
-    pub fn formatter_builder(
-        matching_words: milli::MatchingWords,
-        tokenizer: milli::tokenizer::Tokenizer<'_>,
-    ) -> MatcherBuilder<'_> {
-        let formatter_builder = MatcherBuilder::new(matching_words, tokenizer);
-
-        formatter_builder
-    }
-
     pub fn new(
+        matching_words: milli::MatchingWords,
+        tokenizer: milli::tokenizer::Tokenizer<'a>,
+        attr_fmt: AttributesFormat,
         index: &'a Index,
         rtxn: &'a RoTxn<'a>,
-        format: AttributesFormat,
-        mut formatter_builder: MatcherBuilder<'a>,
     ) -> milli::Result<Self> {
-        formatter_builder.crop_marker(format.crop_marker);
-        formatter_builder.highlight_prefix(format.highlight_pre_tag);
-        formatter_builder.highlight_suffix(format.highlight_post_tag);
+        let AttributesFormat { highlight_pre_tag, highlight_post_tag, crop_marker, .. } = attr_fmt;
+
+        let matcher_builder = MatcherBuilder::new(
+            matching_words,
+            tokenizer,
+            MarkerOptions { highlight_pre_tag, highlight_post_tag, crop_marker },
+        );
 
         let fields_ids_map = index.fields_ids_map(rtxn)?;
         let displayed_ids = index
@@ -1392,21 +1394,21 @@ impl<'a> HitMaker<'a> {
                 let displayed_names = index.displayed_fields(rtxn)?.unwrap();
                 !displayed_names.contains(&milli::constants::RESERVED_VECTORS_FIELD_NAME)
             }
-            // displayed_ids is a finit list, so hide if `_vectors` is not part of it
+            // displayed_ids is a finite list, so hide if `_vectors` is not part of it
             (Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
         };
 
         let displayed_ids =
             displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
 
-        let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors {
+        let retrieve_vectors = if let RetrieveVectors::Retrieve = attr_fmt.retrieve_vectors {
             if vectors_is_hidden {
                 RetrieveVectors::Hide
             } else {
                 RetrieveVectors::Retrieve
             }
         } else {
-            format.retrieve_vectors
+            attr_fmt.retrieve_vectors
         };
 
         let fids = |attrs: &BTreeSet<String>| {
@@ -1423,7 +1425,7 @@ impl<'a> HitMaker<'a> {
             }
             ids
         };
-        let to_retrieve_ids: BTreeSet<_> = format
+        let to_retrieve_ids: BTreeSet<_> = attr_fmt
             .attributes_to_retrieve
             .as_ref()
             .map(fids)
@@ -1432,12 +1434,12 @@ impl<'a> HitMaker<'a> {
             .cloned()
             .collect();
 
-        let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default();
-        let attr_to_crop = format.attributes_to_crop.unwrap_or_default();
+        let attr_to_highlight = attr_fmt.attributes_to_highlight.unwrap_or_default();
+        let attr_to_crop = attr_fmt.attributes_to_crop.unwrap_or_default();
         let formatted_options = compute_formatted_options(
             &attr_to_highlight,
             &attr_to_crop,
-            format.crop_length,
+            attr_fmt.crop_length,
             &to_retrieve_ids,
             &fields_ids_map,
             &displayed_ids,
@@ -1454,51 +1456,53 @@ impl<'a> HitMaker<'a> {
             retrieve_vectors,
             to_retrieve_ids,
             embedding_configs,
-            formatter_builder,
+            matcher_builder,
             formatted_options,
-            show_ranking_score: format.show_ranking_score,
-            show_ranking_score_details: format.show_ranking_score_details,
-            show_matches_position: format.show_matches_position,
-            sort: format.sort,
-            locales: format.locales,
+            show_ranking_score: attr_fmt.show_ranking_score,
+            show_ranking_score_details: attr_fmt.show_ranking_score_details,
+            show_matches_position: attr_fmt.show_matches_position,
+            sort: attr_fmt.sort,
+            locales: attr_fmt.locales,
         })
     }
 
-    pub fn make_hit(&self, id: u32, score: &[ScoreDetails]) -> milli::Result<SearchHit> {
-        let (_, obkv) =
-            self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
+    fn make_document(&self, obkv: &obkv::KvReaderU16) -> milli::Result<Document> {
+        let mut document = serde_json::Map::new();
 
-        // First generate a document with all the displayed fields
-        let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, obkv)?;
-
-        let add_vectors_fid =
-            self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve);
-
-        // select the attributes to retrieve
-        let attributes_to_retrieve = self
-            .to_retrieve_ids
-            .iter()
-            // skip the vectors_fid if RetrieveVectors::Hide
-            .filter(|fid| match self.vectors_fid {
-                Some(vectors_fid) => {
-                    !(self.retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid)
+        // recreate JSON with appropriate attributes
+        for (key, value) in obkv.iter() {
+            if self.vectors_fid.is_some_and(|vectors_fid| vectors_fid == key) {
+                // (vectors aren't considered in `displayedAttributes` and `attributesToRetrieve`, but rather with `retrieveVectors`)
+                if !self.retrieve_vectors.should_retrieve() {
+                    continue;
                 }
-                None => true,
-            })
-            // need to retrieve the existing `_vectors` field if the `RetrieveVectors::Retrieve`
-            .chain(add_vectors_fid.iter())
-            .map(|&fid| self.fields_ids_map.name(fid).expect("Missing field name"));
+            } else if !self.to_retrieve_ids.contains(&key) || !self.displayed_ids.contains(&key) {
+                // https://www.meilisearch.com/docs/reference/api/settings#displayed-attributes
+                // https://www.meilisearch.com/docs/reference/api/search#attributes-to-retrieve
+                continue;
+            }
 
-        let mut document =
-            permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
+            let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson)?;
+            let key = self.fields_ids_map.name(key).expect("Missing field name").to_string();
 
-        if self.retrieve_vectors == RetrieveVectors::Retrieve {
-            // Clippy is wrong
+            document.insert(key, value);
+        }
+
+        Ok(document)
+    }
+
+    pub fn make_hit(&self, id: u32, score: &[ScoreDetails]) -> milli::Result<SearchHit> {
+        let obkv = self.index.document(self.rtxn, id)?;
+
+        let mut document = self.make_document(obkv)?;
+
+        if self.retrieve_vectors.should_retrieve() {
             #[allow(clippy::manual_unwrap_or_default)]
             let mut vectors = match document.remove("_vectors") {
                 Some(Value::Object(map)) => map,
                 _ => Default::default(),
             };
+
             for (name, vector) in self.index.embeddings(self.rtxn, id)? {
                 let user_provided = self
                     .embedding_configs
@@ -1507,6 +1511,7 @@ impl<'a> HitMaker<'a> {
                     .is_some_and(|conf| conf.user_provided.contains(id));
                 let embeddings =
                     ExplicitVectors { embeddings: Some(vector.into()), regenerate: !user_provided };
+
                 vectors.insert(
                     name,
                     serde_json::to_value(embeddings).map_err(InternalError::SerdeJson)?,
@@ -1518,10 +1523,10 @@ impl<'a> HitMaker<'a> {
         let localized_attributes =
             self.index.localized_attributes_rules(self.rtxn)?.unwrap_or_default();
 
-        let (matches_position, formatted) = format_fields(
-            &displayed_document,
+        let formatted = format_fields(
+            &mut document,
             &self.fields_ids_map,
-            &self.formatter_builder,
+            &self.matcher_builder,
             &self.formatted_options,
             self.show_matches_position,
             &self.displayed_ids,
@@ -1538,13 +1543,7 @@ impl<'a> HitMaker<'a> {
         let ranking_score_details =
             self.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
 
-        let hit = SearchHit {
-            document,
-            formatted,
-            matches_position,
-            ranking_score_details,
-            ranking_score,
-        };
+        let hit = SearchHit { document, formatted, ranking_score_details, ranking_score };
 
         Ok(hit)
     }
@@ -1553,7 +1552,7 @@ impl<'a> HitMaker<'a> {
 fn make_hits<'a>(
     index: &Index,
     rtxn: &RoTxn<'_>,
-    format: AttributesFormat,
+    attributes_format: AttributesFormat,
     matching_words: milli::MatchingWords,
     documents_ids_scores: impl Iterator<Item = (u32, &'a Vec<ScoreDetails>)> + 'a,
 ) -> milli::Result<Vec<SearchHit>> {
@@ -1568,9 +1567,7 @@ fn make_hits<'a>(
 
     let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref());
 
-    let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
-
-    let hit_maker = HitMaker::new(index, rtxn, format, formatter_builder)?;
+    let hit_maker = HitMaker::new(matching_words, tokenizer, attributes_format, index, rtxn)?;
 
     for (id, score) in documents_ids_scores {
         documents.push(hit_maker.make_hit(id, score)?);
@@ -1886,59 +1883,100 @@ fn add_non_formatted_ids_to_formatted_options(
     }
 }
 
-fn make_document(
-    displayed_attributes: &BTreeSet<FieldId>,
-    field_ids_map: &FieldsIdsMap,
-    obkv: &obkv::KvReaderU16,
-) -> milli::Result<Document> {
-    let mut document = serde_json::Map::new();
-
-    // recreate the original json
-    for (key, value) in obkv.iter() {
-        let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson)?;
-        let key = field_ids_map.name(key).expect("Missing field name").to_string();
-
-        document.insert(key, value);
-    }
-
-    // select the attributes to retrieve
-    let displayed_attributes = displayed_attributes
-        .iter()
-        .map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
-
-    let document = permissive_json_pointer::select_values(&document, displayed_attributes);
-    Ok(document)
-}
-
 #[allow(clippy::too_many_arguments)]
 fn format_fields(
-    document: &Document,
+    document: &mut Document,
     field_ids_map: &FieldsIdsMap,
-    builder: &MatcherBuilder<'_>,
+    matcher_builder: &MatcherBuilder<'_>,
     formatted_options: &BTreeMap<FieldId, FormatOptions>,
-    compute_matches: bool,
+    show_matches_position: bool,
     displayable_ids: &BTreeSet<FieldId>,
     locales: Option<&[Language]>,
     localized_attributes: &[LocalizedAttributesRule],
-) -> milli::Result<(Option<MatchesPosition>, Document)> {
-    let mut matches_position = compute_matches.then(BTreeMap::new);
-    let mut document = document.clone();
-
+) -> milli::Result<Option<Document>> {
     // reduce the formatted option list to the attributes that should be formatted,
     // instead of all the attributes to display.
-    let formatting_fields_options: Vec<_> = formatted_options
+    let formatting_fields_options = formatted_options
         .iter()
         .filter(|(_, option)| option.should_format())
         .map(|(fid, option)| (field_ids_map.name(*fid).unwrap(), option))
-        .collect();
+        .collect::<Vec<_>>();
 
     // select the attributes to retrieve
     let displayable_names =
         displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
+
+    let get_format_options = |key: Cow<'_, str>| {
+        formatting_fields_options
+            .iter()
+            .filter(|(name, ..)| {
+                milli::is_faceted_by(name, &key) || milli::is_faceted_by(&key, name)
+            })
+            .map(|(_, option)| **option)
+            .reduce(|acc, option| acc.merge(option))
+    };
+
+    let get_locales = |key: Cow<'_, str>| {
+        // TODO: Should this be re computed every time?
+        // if no locales has been provided, we try to find the locales in the localized_attributes.
+        locales.or_else(|| {
+            localized_attributes
+                .iter()
+                .find(|rule| matches!(rule.match_str(&key), PatternMatch::Match))
+                .map(LocalizedAttributesRule::locales)
+        })
+    };
+
+    fn get_text(value: &mut Value) -> Option<Cow<'_, String>> {
+        match value {
+            Value::String(text) => Some(Cow::Borrowed(text)),
+            Value::Number(number) => Some(Cow::Owned(number.to_string())),
+            // boolean and null can not be matched by meili, can not be formatted
+            // and array or object cannot be yielded by `permissive_json_pointer::map_leaf_values`
+            _ => None,
+        }
+    }
+
+    if show_matches_position {
+        permissive_json_pointer::map_leaf_values(document, displayable_names, |key, _, value| {
+            let Some(text) = get_text(value) else {
+                *value = Value::Object(Map::from_iter(std::iter::once((
+                    "value".to_string(),
+                    value.take(),
+                ))));
+
+                return;
+            };
+
+            let locales = get_locales(Cow::from(key));
+            let mut matcher = matcher_builder.build(&text, locales);
+            let format_options = get_format_options(Cow::from(key));
+            let match_bounds = matcher.get_match_bounds(format_options);
+
+            let value_iter = std::iter::once(("value".to_string(), value.take()));
+
+            // do not include `matches` in case there is nothing to format
+            let json_map = if let Some(mb) = match_bounds {
+                let matches_iter = std::iter::once((
+                    "matches".to_string(),
+                    serde_json::to_value(mb).expect("TODO"),
+                ));
+                Map::from_iter(value_iter.chain(matches_iter))
+            } else {
+                Map::from_iter(value_iter)
+            };
+
+            *value = Value::Object(json_map);
+        });
+
+        return Ok(None);
+    }
+
+    let mut formatted_document = document.clone();
     permissive_json_pointer::map_leaf_values(
-        &mut document,
+        &mut formatted_document,
         displayable_names,
-        |key, array_indices, value| {
+        |key, _, value| {
             // To get the formatting option of each key we need to see all the rules that applies
             // to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
             // and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
@@ -1946,37 +1984,22 @@ fn format_fields(
             // Warn: The time to compute the format list scales with the number of fields to format;
             // cumulated with map_leaf_values that iterates over all the nested fields, it gives a quadratic complexity:
             // d*f where d is the total number of fields to display and f is the total number of fields to format.
-            let format = formatting_fields_options
-                .iter()
-                .filter(|(name, _option)| {
-                    milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
-                })
-                .map(|(_, option)| **option)
-                .reduce(|acc, option| acc.merge(option));
-            let mut infos = Vec::new();
+            let Some(text) = get_text(value) else {
+                return;
+            };
 
-            // if no locales has been provided, we try to find the locales in the localized_attributes.
-            let locales = locales.or_else(|| {
-                localized_attributes
-                    .iter()
-                    .find(|rule| rule.match_str(key) == PatternMatch::Match)
-                    .map(LocalizedAttributesRule::locales)
-            });
+            let format_options = get_format_options(Cow::from(key));
 
-            *value = format_value(
-                std::mem::take(value),
-                builder,
-                format,
-                &mut infos,
-                compute_matches,
-                array_indices,
-                locales,
-            );
+            // there's nothing to format
+            if !format_options.is_some_and(|v| v.should_format()) {
+                return;
+            }
 
-            if let Some(matches) = matches_position.as_mut() {
-                if !infos.is_empty() {
-                    matches.insert(key.to_owned(), infos);
-                }
+            let locales = get_locales(Cow::from(key));
+
+            let mut matcher = matcher_builder.build(&text, locales);
+            if let Some(formatted_text) = matcher.get_formatted_text(format_options) {
+                *value = Value::String(formatted_text);
             }
         },
     );
@@ -1986,58 +2009,9 @@ fn format_fields(
         // This unwrap must be safe since we got the ids from the fields_ids_map just
         // before.
         .map(|&fid| field_ids_map.name(fid).unwrap());
-    let document = permissive_json_pointer::select_values(&document, selectors);
+    let formatted_document = permissive_json_pointer::select_values(&formatted_document, selectors);
 
-    Ok((matches_position, document))
-}
-
-fn format_value(
-    value: Value,
-    builder: &MatcherBuilder<'_>,
-    format_options: Option<FormatOptions>,
-    infos: &mut Vec<MatchBounds>,
-    compute_matches: bool,
-    array_indices: &[usize],
-    locales: Option<&[Language]>,
-) -> Value {
-    match value {
-        Value::String(old_string) => {
-            let mut matcher = builder.build(&old_string, locales);
-            if compute_matches {
-                let matches = matcher.matches(array_indices);
-                infos.extend_from_slice(&matches[..]);
-            }
-
-            match format_options {
-                Some(format_options) => {
-                    let value = matcher.format(format_options);
-                    Value::String(value.into_owned())
-                }
-                None => Value::String(old_string),
-            }
-        }
-        // `map_leaf_values` makes sure this is only called for leaf fields
-        Value::Array(_) => unreachable!(),
-        Value::Object(_) => unreachable!(),
-        Value::Number(number) => {
-            let s = number.to_string();
-
-            let mut matcher = builder.build(&s, locales);
-            if compute_matches {
-                let matches = matcher.matches(array_indices);
-                infos.extend_from_slice(&matches[..]);
-            }
-
-            match format_options {
-                Some(format_options) => {
-                    let value = matcher.format(format_options);
-                    Value::String(value.into_owned())
-                }
-                None => Value::String(s),
-            }
-        }
-        value => value,
-    }
+    Ok(Some(formatted_document))
 }
 
 pub(crate) fn parse_filter(
diff --git a/crates/milli/src/lib.rs b/crates/milli/src/lib.rs
index 504b4c68d..fbaf36fe8 100644
--- a/crates/milli/src/lib.rs
+++ b/crates/milli/src/lib.rs
@@ -80,8 +80,9 @@ pub use self::localized_attributes_rules::LocalizedAttributesRule;
 pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
 pub use self::search::similar::Similar;
 pub use self::search::{
-    FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
-    Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
+    FacetDistribution, Filter, FormatOptions, MarkerOptions, MatchBounds, MatcherBuilder,
+    MatchingWords, OrderBy, Search, SearchResult, SemanticSearch, TermsMatchingStrategy,
+    DEFAULT_VALUES_PER_FACET,
 };
 pub use self::update::ChannelCongestion;
 
diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs
index 62183afc3..2ddb2ddb9 100644
--- a/crates/milli/src/search/mod.rs
+++ b/crates/milli/src/search/mod.rs
@@ -7,7 +7,9 @@ use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;
 
 pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
-pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
+pub use self::new::matches::{
+    FormatOptions, MarkerOptions, MatchBounds, MatcherBuilder, MatchingWords,
+};
 use self::new::{execute_vector_search, PartialSearchResult, VectorStoreStats};
 use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
 use crate::index::MatchingStrategy;
@@ -278,7 +280,7 @@ impl<'a> Search<'a> {
 
         // consume context and located_query_terms to build MatchingWords.
         let matching_words = match located_query_terms {
-            Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms),
+            Some(located_query_terms) => MatchingWords::new(ctx, &located_query_terms),
             None => MatchingWords::default(),
         };
 
diff --git a/crates/milli/src/search/new/matches/adjust_indices.rs b/crates/milli/src/search/new/matches/adjust_indices.rs
new file mode 100644
index 000000000..6c5df2ddf
--- /dev/null
+++ b/crates/milli/src/search/new/matches/adjust_indices.rs
@@ -0,0 +1,222 @@
+use std::cmp::Ordering;
+
+use charabia::{SeparatorKind, Token};
+
+#[derive(Clone)]
+enum Direction {
+    Forwards,
+    Backwards,
+}
+
+impl Direction {
+    fn switch(&mut self) {
+        *self = match self {
+            Direction::Backwards => Direction::Forwards,
+            Direction::Forwards => Direction::Backwards,
+        }
+    }
+}
+
+fn get_adjusted_indices_for_too_few_words(
+    tokens: &[Token],
+    mut index_backward: usize,
+    mut index_forward: usize,
+    mut words_count: usize,
+    crop_size: usize,
+) -> [usize; 2] {
+    let mut valid_index_backward = index_backward;
+    let mut valid_index_forward = index_forward;
+
+    let mut is_end_reached = index_forward == tokens.len() - 1;
+    let mut is_beginning_reached = index_backward == 0;
+
+    let mut is_index_backwards_at_hard_separator = false;
+    let mut is_index_forwards_at_hard_separator = false;
+
+    let mut is_crop_size_or_both_ends_reached =
+        words_count == crop_size || (is_end_reached && is_beginning_reached);
+
+    let mut dir = Direction::Forwards;
+
+    loop {
+        if is_crop_size_or_both_ends_reached {
+            break;
+        }
+
+        let (index, valid_index) = match dir {
+            Direction::Backwards => (&mut index_backward, &mut valid_index_backward),
+            Direction::Forwards => (&mut index_forward, &mut valid_index_forward),
+        };
+
+        loop {
+            match dir {
+                Direction::Forwards => {
+                    if is_end_reached {
+                        break;
+                    }
+
+                    *index += 1;
+
+                    is_end_reached = *index == tokens.len() - 1;
+                }
+                Direction::Backwards => {
+                    if is_beginning_reached
+                        || (!is_end_reached
+                            && is_index_backwards_at_hard_separator
+                            && !is_index_forwards_at_hard_separator)
+                    {
+                        break;
+                    }
+
+                    *index -= 1;
+
+                    is_beginning_reached = *index == 0;
+                }
+            };
+
+            if is_end_reached && is_beginning_reached {
+                is_crop_size_or_both_ends_reached = true;
+            }
+
+            let maybe_is_token_hard_separator = tokens[*index]
+                .separator_kind()
+                .map(|sep_kind| matches!(sep_kind, SeparatorKind::Hard));
+
+            // it's not a separator
+            if maybe_is_token_hard_separator.is_none() {
+                *valid_index = *index;
+                words_count += 1;
+
+                if words_count == crop_size {
+                    is_crop_size_or_both_ends_reached = true;
+                }
+
+                break;
+            }
+
+            let is_index_at_hard_separator = match dir {
+                Direction::Backwards => &mut is_index_backwards_at_hard_separator,
+                Direction::Forwards => &mut is_index_forwards_at_hard_separator,
+            };
+            *is_index_at_hard_separator =
+                maybe_is_token_hard_separator.is_some_and(|is_hard| is_hard);
+        }
+
+        dir.switch();
+
+        // 1. if end is reached, we can only advance backwards
+        // 2. if forwards index reached a hard separator and backwards is currently hard, we can go backwards
+    }
+
+    // keep advancing forward and backward to check if there's only separator tokens
+    // left until the end if so, then include those too in the index range
+
+    let saved_index = valid_index_forward;
+    loop {
+        if valid_index_forward == tokens.len() - 1 {
+            break;
+        }
+
+        valid_index_forward += 1;
+
+        if !tokens[valid_index_forward].is_separator() {
+            valid_index_forward = saved_index;
+            break;
+        }
+    }
+
+    let saved_index = valid_index_backward;
+    loop {
+        if valid_index_backward == 0 {
+            break;
+        }
+
+        valid_index_backward -= 1;
+
+        if !tokens[valid_index_backward].is_separator() {
+            valid_index_backward = saved_index;
+            break;
+        }
+    }
+
+    [valid_index_backward, valid_index_forward]
+}
+
+fn get_adjusted_index_forward_for_too_many_words(
+    tokens: &[Token],
+    index_backward: usize,
+    mut index_forward: usize,
+    mut words_count: usize,
+    crop_size: usize,
+) -> usize {
+    loop {
+        if index_forward == index_backward {
+            return index_forward;
+        }
+
+        index_forward -= 1;
+
+        if tokens[index_forward].is_separator() {
+            continue;
+        }
+
+        words_count -= 1;
+
+        if words_count == crop_size {
+            break;
+        }
+    }
+
+    index_forward
+}
+
+pub fn get_adjusted_indices_for_highlights_and_crop_size(
+    tokens: &[Token],
+    index_backward: usize,
+    index_forward: usize,
+    words_count: usize,
+    crop_size: usize,
+) -> [usize; 2] {
+    match words_count.cmp(&crop_size) {
+        Ordering::Equal | Ordering::Less => get_adjusted_indices_for_too_few_words(
+            tokens,
+            index_backward,
+            index_forward,
+            words_count,
+            crop_size,
+        ),
+        Ordering::Greater => [
+            index_backward,
+            get_adjusted_index_forward_for_too_many_words(
+                tokens,
+                index_backward,
+                index_forward,
+                words_count,
+                crop_size,
+            ),
+        ],
+    }
+}
+
+pub fn get_adjusted_index_forward_for_crop_size(tokens: &[Token], crop_size: usize) -> usize {
+    let mut words_count = 0;
+    let mut index = 0;
+
+    while index != tokens.len() - 1 {
+        if !tokens[index].is_separator() {
+            words_count += 1;
+
+            if words_count == crop_size {
+                break;
+            }
+        }
+
+        index += 1;
+    }
+
+    if index == tokens.len() - 1 {
+        return index;
+    }
+
+    index + 1
+}
diff --git a/crates/milli/src/search/new/matches/best_match_interval.rs b/crates/milli/src/search/new/matches/best_match_interval.rs
deleted file mode 100644
index 1a8914e98..000000000
--- a/crates/milli/src/search/new/matches/best_match_interval.rs
+++ /dev/null
@@ -1,139 +0,0 @@
-use super::matching_words::WordId;
-use super::{Match, MatchPosition};
-
-struct MatchIntervalWithScore {
-    interval: [usize; 2],
-    score: [i16; 3],
-}
-
-// count score for phrases
-fn tally_phrase_scores(fwp: &usize, lwp: &usize, order_score: &mut i16, distance_score: &mut i16) {
-    let words_in_phrase_minus_one = (lwp - fwp) as i16;
-    // will always be ordered, so +1 for each space between words
-    *order_score += words_in_phrase_minus_one;
-    // distance will always be 1, so -1 for each space between words
-    *distance_score -= words_in_phrase_minus_one;
-}
-
-/// Compute the score of a match interval:
-/// 1) count unique matches
-/// 2) calculate distance between matches
-/// 3) count ordered matches
-fn get_interval_score(matches: &[Match]) -> [i16; 3] {
-    let mut ids: Vec<WordId> = Vec::with_capacity(matches.len());
-    let mut order_score = 0;
-    let mut distance_score = 0;
-
-    let mut iter = matches.iter().peekable();
-    while let Some(m) = iter.next() {
-        if let Some(next_match) = iter.peek() {
-            // if matches are ordered
-            if next_match.ids.iter().min() > m.ids.iter().min() {
-                order_score += 1;
-            }
-
-            let m_last_word_pos = match m.position {
-                MatchPosition::Word { word_position, .. } => word_position,
-                MatchPosition::Phrase { word_positions: [fwp, lwp], .. } => {
-                    tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
-                    lwp
-                }
-            };
-            let next_match_first_word_pos = next_match.get_first_word_pos();
-
-            // compute distance between matches
-            distance_score -= (next_match_first_word_pos - m_last_word_pos).min(7) as i16;
-        } else if let MatchPosition::Phrase { word_positions: [fwp, lwp], .. } = m.position {
-            // in case last match is a phrase, count score for its words
-            tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
-        }
-
-        ids.extend(m.ids.iter());
-    }
-
-    ids.sort_unstable();
-    ids.dedup();
-    let uniq_score = ids.len() as i16;
-
-    // rank by unique match count, then by distance between matches, then by ordered match count.
-    [uniq_score, distance_score, order_score]
-}
-
-/// Returns the first and last match where the score computed by match_interval_score is the best.
-pub fn find_best_match_interval(matches: &[Match], crop_size: usize) -> [&Match; 2] {
-    if matches.is_empty() {
-        panic!("`matches` should not be empty at this point");
-    }
-
-    // positions of the first and the last match of the best matches interval in `matches`.
-    let mut best_interval: Option<MatchIntervalWithScore> = None;
-
-    let mut save_best_interval = |interval_first, interval_last| {
-        let interval_score = get_interval_score(&matches[interval_first..=interval_last]);
-        let is_interval_score_better = &best_interval
-            .as_ref()
-            .is_none_or(|MatchIntervalWithScore { score, .. }| interval_score > *score);
-
-        if *is_interval_score_better {
-            best_interval = Some(MatchIntervalWithScore {
-                interval: [interval_first, interval_last],
-                score: interval_score,
-            });
-        }
-    };
-
-    // we compute the matches interval if we have at least 2 matches.
-    // current interval positions.
-    let mut interval_first = 0;
-    let mut interval_first_match_first_word_pos = matches[interval_first].get_first_word_pos();
-
-    for (index, next_match) in matches.iter().enumerate() {
-        // if next match would make interval gross more than crop_size,
-        // we compare the current interval with the best one,
-        // then we increase `interval_first` until next match can be added.
-        let next_match_last_word_pos = next_match.get_last_word_pos();
-
-        // if the next match would mean that we pass the crop size window,
-        // we take the last valid match, that didn't pass this boundry, which is `index` - 1,
-        // and calculate a score for it, and check if it's better than our best so far
-        if next_match_last_word_pos - interval_first_match_first_word_pos >= crop_size {
-            // if index is 0 there is no last viable match
-            if index != 0 {
-                let interval_last = index - 1;
-                // keep interval if it's the best
-                save_best_interval(interval_first, interval_last);
-            }
-
-            // advance start of the interval while interval is longer than crop_size.
-            loop {
-                interval_first += 1;
-                if interval_first == matches.len() {
-                    interval_first -= 1;
-                    break;
-                }
-
-                interval_first_match_first_word_pos = matches[interval_first].get_first_word_pos();
-
-                if interval_first_match_first_word_pos > next_match_last_word_pos
-                    || next_match_last_word_pos - interval_first_match_first_word_pos < crop_size
-                {
-                    break;
-                }
-            }
-        }
-    }
-
-    // compute the last interval score and compare it to the best one.
-    let interval_last = matches.len() - 1;
-    // if it's the last match with itself, we need to make sure it's
-    // not a phrase longer than the crop window
-    if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size {
-        save_best_interval(interval_first, interval_last);
-    }
-
-    // if none of the matches fit the criteria above, default to the first one
-    best_interval.map_or(
-        [&matches[0], &matches[0]],
-        |MatchIntervalWithScore { interval: [first, last], .. }| [&matches[first], &matches[last]],
-    )
-}
diff --git a/crates/milli/src/search/new/matches/best_match_range.rs b/crates/milli/src/search/new/matches/best_match_range.rs
new file mode 100644
index 000000000..6c8857cdd
--- /dev/null
+++ b/crates/milli/src/search/new/matches/best_match_range.rs
@@ -0,0 +1,169 @@
+use std::cell::Cell;
+
+use crate::search::new::matches::matching_words::QueryPosition;
+
+use super::r#match::{Match, MatchPosition};
+
+struct MatchesIndexRangeWithScore {
+    matches_index_range: [usize; 2],
+    score: [i16; 3],
+}
+
+/// Compute the score of a match interval:
+/// 1) count unique matches
+/// 2) calculate distance between matches
+/// 3) count ordered matches
+fn get_score(
+    matches: &[Match],
+    query_positions: &[QueryPosition],
+    index_first: usize,
+    index_last: usize,
+) -> [i16; 3] {
+    let order_score = Cell::new(0);
+    let distance_score = Cell::new(0);
+
+    let mut iter = (index_first..=index_last)
+        .filter_map(|index| {
+            query_positions.iter().find_map(move |v| (v.index == index).then(|| v.range[0]))
+        })
+        .peekable();
+    while let (Some(range_first), Some(next_range_first)) = (iter.next(), iter.peek()) {
+        if range_first < *next_range_first {
+            order_score.set(order_score.get() + 1);
+        }
+    }
+
+    // count score for phrases
+    let tally_phrase_scores = |fwp, lwp| {
+        let words_in_phrase_minus_one = (lwp - fwp) as i16;
+        // will always be in the order of query, so +1 for each space between words
+        order_score.set(order_score.get() + words_in_phrase_minus_one);
+        // distance will always be 1, so -1 for each space between words
+        distance_score.set(distance_score.get() - words_in_phrase_minus_one);
+    };
+
+    let mut iter = matches[index_first..=index_last].iter().peekable();
+    while let Some(r#match) = iter.next() {
+        if let Some(next_match) = iter.peek() {
+            let match_last_word_pos = match r#match.position {
+                MatchPosition::Word { word_position, .. } => word_position,
+                MatchPosition::Phrase { word_position_range: [fwp, lwp], .. } => {
+                    tally_phrase_scores(fwp, lwp);
+                    lwp
+                }
+            };
+            let next_match_first_word_pos = next_match.get_first_word_pos();
+
+            // compute distance between matches
+            distance_score.set(
+                distance_score.get()
+                    - (next_match_first_word_pos - match_last_word_pos).min(7) as i16,
+            );
+        } else if let MatchPosition::Phrase { word_position_range: [fwp, lwp], .. } =
+            r#match.position
+        {
+            // in case last match is a phrase, count score for its words
+            tally_phrase_scores(fwp, lwp);
+        }
+    }
+
+    let mut uniqueness_score = 0i16;
+    let mut current_range: Option<super::matching_words::UserQueryPositionRange> = None;
+
+    for qp in query_positions.iter().filter(|v| v.index >= index_first && v.index <= index_last) {
+        match current_range.as_mut() {
+            Some([saved_range_start, saved_range_end]) => {
+                let [range_start, range_end] = qp.range;
+
+                if range_start > *saved_range_start {
+                    uniqueness_score += (*saved_range_end - *saved_range_start) as i16 + 1;
+
+                    *saved_range_start = range_start;
+                    *saved_range_end = range_end;
+                } else if range_end > *saved_range_end {
+                    *saved_range_end = range_end;
+                }
+            }
+            None => current_range = Some(qp.range),
+        }
+    }
+
+    if let Some([saved_range_start, saved_range_end]) = current_range {
+        uniqueness_score += (saved_range_end - saved_range_start) as i16 + 1;
+    }
+
+    // rank by unique match count, then by distance between matches, then by ordered match count.
+    [uniqueness_score, distance_score.into_inner(), order_score.into_inner()]
+}
+
+/// Returns the first and last match where the score computed by match_interval_score is the best.
+pub fn get_best_match_index_range(
+    matches: &[Match],
+    query_positions: &[QueryPosition],
+    crop_size: usize,
+) -> [usize; 2] {
+    // positions of the first and the last match of the best matches index range in `matches`.
+    let mut best_matches_index_range: Option<MatchesIndexRangeWithScore> = None;
+
+    let mut save_best_matches_index_range = |index_first, index_last| {
+        let score = get_score(matches, query_positions, index_first, index_last);
+        let is_score_better = best_matches_index_range.as_ref().is_none_or(|v| score > v.score);
+
+        if is_score_better {
+            best_matches_index_range = Some(MatchesIndexRangeWithScore {
+                matches_index_range: [index_first, index_last],
+                score,
+            });
+        }
+    };
+
+    // we compute the matches index range if we have at least 2 matches.
+    let mut index_first = 0;
+    let mut first_match_first_word_pos = matches[index_first].get_first_word_pos();
+
+    for (index, next_match) in matches.iter().enumerate() {
+        // if next match would make index range gross more than crop_size,
+        // we compare the current index range with the best one,
+        // then we increase `index_first` until next match can be added.
+        let next_match_last_word_pos = next_match.get_last_word_pos();
+
+        // if the next match would mean that we pass the crop size window,
+        // we take the last valid match, that didn't pass this boundry, which is `index` - 1,
+        // and calculate a score for it, and check if it's better than our best so far
+        if next_match_last_word_pos - first_match_first_word_pos + 1 > crop_size {
+            // if index is 0 there is no previous viable match
+            if index != 0 {
+                // keep index range if it's the best
+                save_best_matches_index_range(index_first, index - 1);
+            }
+
+            // advance `index_first` while index range is longer than crop_size.
+            loop {
+                if index_first == matches.len() - 1 {
+                    break;
+                }
+
+                index_first += 1;
+                first_match_first_word_pos = matches[index_first].get_first_word_pos();
+
+                // also make sure that subtracting won't cause a panic
+                if next_match_last_word_pos < first_match_first_word_pos
+                    || next_match_last_word_pos - first_match_first_word_pos + 1 < crop_size
+                {
+                    break;
+                }
+            }
+        }
+    }
+
+    // compute the last index range score and compare it to the best one.
+    let index_last = matches.len() - 1;
+    // if it's the last match with itself, we need to make sure it's
+    // not a phrase longer than the crop window
+    if index_first != index_last || matches[index_first].get_word_count() < crop_size {
+        save_best_matches_index_range(index_first, index_last);
+    }
+
+    // if none of the matches fit the criteria above, default to the first one
+    best_matches_index_range.map_or([0, 0], |v| v.matches_index_range)
+}
diff --git a/crates/milli/src/search/new/matches/match.rs b/crates/milli/src/search/new/matches/match.rs
index 2eef4d5a6..570ea2e8e 100644
--- a/crates/milli/src/search/new/matches/match.rs
+++ b/crates/milli/src/search/new/matches/match.rs
@@ -1,62 +1,49 @@
-use super::matching_words::WordId;
-
-#[derive(Clone, Debug)]
+#[derive(Debug, PartialEq)]
 pub enum MatchPosition {
-    Word {
-        // position of the word in the whole text.
-        word_position: usize,
-        // position of the token in the whole text.
-        token_position: usize,
-    },
-    Phrase {
-        // position of the first and last word in the phrase in the whole text.
-        word_positions: [usize; 2],
-        // position of the first and last token in the phrase in the whole text.
-        token_positions: [usize; 2],
-    },
+    Word { word_position: usize, token_position: usize },
+    Phrase { word_position_range: [usize; 2], token_position_range: [usize; 2] },
 }
 
-#[derive(Clone, Debug)]
+#[derive(Debug, PartialEq)]
 pub struct Match {
     pub char_count: usize,
-    // ids of the query words that matches.
-    pub ids: Vec<WordId>,
+    pub byte_len: usize,
     pub position: MatchPosition,
 }
 
 impl Match {
-    pub(super) fn get_first_word_pos(&self) -> usize {
+    pub fn get_first_word_pos(&self) -> usize {
         match self.position {
             MatchPosition::Word { word_position, .. } => word_position,
-            MatchPosition::Phrase { word_positions: [fwp, _], .. } => fwp,
+            MatchPosition::Phrase { word_position_range: [fwp, _], .. } => fwp,
         }
     }
 
-    pub(super) fn get_last_word_pos(&self) -> usize {
+    pub fn get_last_word_pos(&self) -> usize {
         match self.position {
             MatchPosition::Word { word_position, .. } => word_position,
-            MatchPosition::Phrase { word_positions: [_, lwp], .. } => lwp,
+            MatchPosition::Phrase { word_position_range: [_, lwp], .. } => lwp,
         }
     }
 
-    pub(super) fn get_first_token_pos(&self) -> usize {
+    pub fn get_first_token_pos(&self) -> usize {
         match self.position {
             MatchPosition::Word { token_position, .. } => token_position,
-            MatchPosition::Phrase { token_positions: [ftp, _], .. } => ftp,
+            MatchPosition::Phrase { token_position_range: [ftp, _], .. } => ftp,
         }
     }
 
-    pub(super) fn get_last_token_pos(&self) -> usize {
+    pub fn get_last_token_pos(&self) -> usize {
         match self.position {
             MatchPosition::Word { token_position, .. } => token_position,
-            MatchPosition::Phrase { token_positions: [_, ltp], .. } => ltp,
+            MatchPosition::Phrase { token_position_range: [_, ltp], .. } => ltp,
         }
     }
 
-    pub(super) fn get_word_count(&self) -> usize {
+    pub fn get_word_count(&self) -> usize {
         match self.position {
             MatchPosition::Word { .. } => 1,
-            MatchPosition::Phrase { word_positions: [fwp, lwp], .. } => lwp - fwp + 1,
+            MatchPosition::Phrase { word_position_range: [fwp, lwp], .. } => lwp - fwp + 1,
         }
     }
 }
diff --git a/crates/milli/src/search/new/matches/match_bounds.rs b/crates/milli/src/search/new/matches/match_bounds.rs
new file mode 100644
index 000000000..44f88b648
--- /dev/null
+++ b/crates/milli/src/search/new/matches/match_bounds.rs
@@ -0,0 +1,270 @@
+use std::cmp::{max, min};
+
+use super::{
+    matching_words::QueryPosition,
+    r#match::{Match, MatchPosition},
+};
+
+use super::adjust_indices::{
+    get_adjusted_index_forward_for_crop_size, get_adjusted_indices_for_highlights_and_crop_size,
+};
+use charabia::Token;
+use serde::Serialize;
+use utoipa::ToSchema;
+
+use super::FormatOptions;
+
+// TODO: Differentiate if full match do not return None, instead return match bounds with full length
+#[derive(Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct MatchBounds {
+    pub highlight_toggle: bool,
+    pub indices: Vec<usize>,
+}
+
+struct MatchBoundsHelper<'a> {
+    tokens: &'a [Token<'a>],
+    matches: &'a [Match],
+    query_positions: &'a [QueryPosition],
+}
+
+struct MatchesAndCropIndices {
+    matches_first_index: usize,
+    matches_last_index: usize,
+    crop_byte_start: usize,
+    crop_byte_end: usize,
+}
+
+enum CropThing {
+    Last(usize),
+    First(usize),
+}
+
+impl MatchBoundsHelper<'_> {
+    fn get_match_byte_position_range(&self, r#match: &Match) -> [usize; 2] {
+        let byte_start = match r#match.position {
+            MatchPosition::Word { token_position, .. } => self.tokens[token_position].byte_start,
+            MatchPosition::Phrase { token_position_range: [ftp, ..], .. } => {
+                self.tokens[ftp].byte_start
+            }
+        };
+
+        [byte_start, byte_start + r#match.byte_len]
+    }
+
+    // TODO: Rename this
+    fn get_match_byte_position_rangee(
+        &self,
+        index: &mut usize,
+        crop_thing: CropThing,
+    ) -> [usize; 2] {
+        let new_index = match crop_thing {
+            CropThing::First(_) if *index != 0 => *index - 1,
+            CropThing::Last(_) if *index != self.matches.len() - 1 => *index + 1,
+            _ => {
+                return self.get_match_byte_position_range(&self.matches[*index]);
+            }
+        };
+
+        let [byte_start, byte_end] = self.get_match_byte_position_range(&self.matches[new_index]);
+
+        // NOTE: This doesn't need additional checks, because `get_best_match_index_range` already
+        // guarantees that the next or preceding match contains the crop boundary
+        match crop_thing {
+            CropThing::First(crop_byte_start) if crop_byte_start < byte_end => {
+                *index -= 1;
+                [byte_start, byte_end]
+            }
+            CropThing::Last(crop_byte_end) if byte_start < crop_byte_end => {
+                *index += 1;
+                [byte_start, byte_end]
+            }
+            _ => self.get_match_byte_position_range(&self.matches[*index]),
+        }
+    }
+
+    /// TODO: Description
+    fn get_match_bounds(&self, mci: MatchesAndCropIndices) -> MatchBounds {
+        let MatchesAndCropIndices {
+            mut matches_first_index,
+            mut matches_last_index,
+            crop_byte_start,
+            crop_byte_end,
+        } = mci;
+
+        let [first_match_first_byte, first_match_last_byte] = self.get_match_byte_position_rangee(
+            &mut matches_first_index,
+            CropThing::First(crop_byte_start),
+        );
+        let first_match_first_byte = max(first_match_first_byte, crop_byte_start);
+
+        let [last_match_first_byte, last_match_last_byte] =
+            if matches_first_index != matches_last_index {
+                self.get_match_byte_position_rangee(
+                    &mut matches_last_index,
+                    CropThing::Last(crop_byte_end),
+                )
+            } else {
+                [first_match_first_byte, first_match_last_byte]
+            };
+        let last_match_last_byte = min(last_match_last_byte, crop_byte_end);
+
+        let selected_matches_len = matches_last_index - matches_first_index + 1;
+        let mut indices_size = 2 * selected_matches_len;
+
+        let crop_byte_start_is_not_first_match_start = crop_byte_start != first_match_first_byte;
+        let crop_byte_end_is_not_last_match_end = crop_byte_end != last_match_last_byte;
+
+        if crop_byte_start_is_not_first_match_start {
+            indices_size += 1;
+        }
+
+        if crop_byte_end_is_not_last_match_end {
+            indices_size += 1;
+        }
+
+        let mut indices = Vec::with_capacity(indices_size);
+
+        if crop_byte_start_is_not_first_match_start {
+            indices.push(crop_byte_start);
+        }
+
+        indices.push(first_match_first_byte);
+
+        if selected_matches_len > 1 {
+            indices.push(first_match_last_byte);
+        }
+
+        if selected_matches_len > 2 {
+            for index in (matches_first_index + 1)..matches_last_index {
+                let [m_byte_start, m_byte_end] =
+                    self.get_match_byte_position_range(&self.matches[index]);
+
+                indices.push(m_byte_start);
+                indices.push(m_byte_end);
+            }
+        }
+
+        if selected_matches_len > 1 {
+            indices.push(last_match_first_byte);
+        }
+
+        indices.push(last_match_last_byte);
+
+        if crop_byte_end_is_not_last_match_end {
+            indices.push(crop_byte_end);
+        }
+
+        MatchBounds { highlight_toggle: !crop_byte_start_is_not_first_match_start, indices }
+    }
+
+    /// For crop but no highlight.
+    fn get_crop_bounds_with_no_matches(&self, crop_size: usize) -> MatchBounds {
+        let final_token_index = get_adjusted_index_forward_for_crop_size(self.tokens, crop_size);
+        let final_token = &self.tokens[final_token_index];
+
+        // TODO: Why is it that when we match all of the tokens we need to get byte_end instead of start?
+
+        // TODO: Can here be an error, because it's byte_start but it could be byte_end?
+        MatchBounds { highlight_toggle: false, indices: vec![0, final_token.byte_start] }
+    }
+
+    fn get_matches_and_crop_indices(&self, crop_size: usize) -> MatchesAndCropIndices {
+        let asd = |i1, i2| {
+            println!(
+                "{}|{}|{}\n{} {}",
+                self.tokens[..i1].iter().map(|v| v.lemma()).collect::<Vec<_>>().join(""),
+                self.tokens[i1..i2].iter().map(|v| v.lemma()).collect::<Vec<_>>().join(""),
+                self.tokens[i2..].iter().map(|v| v.lemma()).collect::<Vec<_>>().join(""),
+                i1,
+                i2
+            );
+        };
+
+        // TODO: This doesn't give back 2 phrases if one is out of crop window
+        // Solution: also get next and previous matches, and if they're in the crop window, even if partially, highlight them
+        let [matches_first_index, matches_last_index] =
+            super::best_match_range::get_best_match_index_range(
+                self.matches,
+                self.query_positions,
+                crop_size,
+            );
+
+        let first_match = &self.matches[matches_first_index];
+        let last_match = &self.matches[matches_last_index];
+
+        let last_match_last_word_pos = last_match.get_last_word_pos();
+        let first_match_first_word_pos = first_match.get_first_word_pos();
+
+        let words_count = last_match_last_word_pos - first_match_first_word_pos + 1;
+        let [index_backward, index_forward] = get_adjusted_indices_for_highlights_and_crop_size(
+            self.tokens,
+            first_match.get_first_token_pos(),
+            last_match.get_last_token_pos(),
+            words_count,
+            crop_size,
+        );
+
+        asd(first_match.get_first_token_pos(), last_match.get_last_token_pos());
+        asd(index_backward, index_forward);
+
+        let backward_token = &self.tokens[index_backward];
+        let forward_token = &self.tokens[index_forward];
+
+        MatchesAndCropIndices {
+            matches_first_index,
+            matches_last_index,
+            crop_byte_start: backward_token.byte_start,
+            crop_byte_end: forward_token.byte_end,
+        }
+    }
+
+    /// TODO: description
+    fn get_crop_and_highlight_bounds_with_matches(&self, crop_size: usize) -> MatchBounds {
+        self.get_match_bounds(self.get_matches_and_crop_indices(crop_size))
+    }
+
+    /// For when there are no matches, but crop is required.
+    fn get_crop_bounds_with_matches(&self, crop_size: usize) -> MatchBounds {
+        let mci = self.get_matches_and_crop_indices(crop_size);
+
+        MatchBounds {
+            highlight_toggle: false,
+            indices: vec![mci.crop_byte_start, mci.crop_byte_end],
+        }
+    }
+}
+
+impl MatchBounds {
+    pub fn try_new(
+        tokens: &[Token],
+        matches: &[Match],
+        query_positions: &[QueryPosition],
+        format_options: FormatOptions,
+    ) -> Option<MatchBounds> {
+        let mbh = MatchBoundsHelper { tokens, matches, query_positions };
+
+        if let Some(crop_size) = format_options.crop.filter(|v| *v != 0) {
+            if matches.is_empty() {
+                return Some(mbh.get_crop_bounds_with_no_matches(crop_size));
+            }
+
+            if format_options.highlight {
+                return Some(mbh.get_crop_and_highlight_bounds_with_matches(crop_size));
+            }
+
+            return Some(mbh.get_crop_bounds_with_matches(crop_size));
+        }
+
+        if !format_options.highlight || matches.is_empty() {
+            return None;
+        }
+
+        Some(mbh.get_match_bounds(MatchesAndCropIndices {
+            matches_first_index: 0,
+            matches_last_index: matches.len() - 1,
+            crop_byte_start: 0,
+            crop_byte_end: tokens[tokens.len() - 1].byte_end,
+        }))
+    }
+}
diff --git a/crates/milli/src/search/new/matches/matching_words.rs b/crates/milli/src/search/new/matches/matching_words.rs
index 64235298b..3edc3eb38 100644
--- a/crates/milli/src/search/new/matches/matching_words.rs
+++ b/crates/milli/src/search/new/matches/matching_words.rs
@@ -1,24 +1,89 @@
 use std::cmp::Reverse;
-use std::fmt;
-use std::ops::RangeInclusive;
+use std::fmt::{Debug, Formatter, Result};
 
 use charabia::Token;
 
 use super::super::interner::Interned;
 use super::super::query_term::LocatedQueryTerm;
 use super::super::{DedupInterner, Phrase};
+use super::r#match::{Match, MatchPosition};
 use crate::SearchContext;
 
-pub struct LocatedMatchingPhrase {
-    pub value: Interned<Phrase>,
-    pub positions: RangeInclusive<WordId>,
+enum PrefixedOrEquality {
+    Prefixed,
+    Equality,
+    NotApplicable,
 }
 
-pub struct LocatedMatchingWords {
-    pub value: Vec<Interned<String>>,
-    pub positions: RangeInclusive<WordId>,
-    pub is_prefix: bool,
-    pub original_char_count: usize,
+impl PrefixedOrEquality {
+    fn new(string: &str, other_string: &str, is_other_string_prefix: bool) -> Self {
+        if string.is_empty() {
+            return if other_string.is_empty() { Self::Equality } else { Self::NotApplicable };
+        }
+
+        let mut other_string_iter = other_string.chars();
+
+        for c in string.chars() {
+            let Some(other_c) = other_string_iter.next() else {
+                return if is_other_string_prefix { Self::Prefixed } else { Self::NotApplicable };
+            };
+
+            if c != other_c {
+                return Self::NotApplicable;
+            }
+        }
+
+        if other_string_iter.next().is_some() {
+            return Self::NotApplicable;
+        }
+
+        Self::Equality
+    }
+}
+
+// TODO: Consider using a tuple here, because indexing this thing out of bounds only incurs a runtime error
+pub type UserQueryPositionRange = [u16; 2];
+
+struct LocatedMatchingPhrase {
+    value: Interned<Phrase>,
+    position: UserQueryPositionRange,
+}
+
+struct LocatedMatchingWords {
+    value: Vec<Interned<String>>,
+    position: UserQueryPositionRange,
+    is_prefix: bool,
+    original_char_count: usize,
+}
+
+struct TokenPositionHelper<'a> {
+    token: &'a Token<'a>,
+    position_by_word: usize,
+    position_by_token: usize,
+}
+
+impl<'a> TokenPositionHelper<'a> {
+    fn iter_from_tokens(tokens: &'a [Token]) -> impl Iterator<Item = Self> + Clone {
+        tokens
+            .iter()
+            .scan([0, 0], |[token_position, word_position], token| {
+                // TODO: Naming
+                let token_word_thingy = Self {
+                    position_by_token: *token_position,
+                    position_by_word: *word_position,
+                    token,
+                };
+
+                *token_position += 1;
+
+                if !token.is_separator() {
+                    *word_position += 1;
+                }
+
+                Some(token_word_thingy)
+            })
+            .filter(|t| !t.token.is_separator())
+    }
 }
 
 /// Structure created from a query tree
@@ -27,180 +92,263 @@ pub struct LocatedMatchingWords {
 pub struct MatchingWords {
     word_interner: DedupInterner<String>,
     phrase_interner: DedupInterner<Phrase>,
-    phrases: Vec<LocatedMatchingPhrase>,
-    words: Vec<LocatedMatchingWords>,
+    located_matching_phrases: Vec<LocatedMatchingPhrase>,
+    located_matching_words: Vec<LocatedMatchingWords>,
+}
+
+#[cfg_attr(test, derive(Debug, PartialEq))]
+pub struct QueryPosition {
+    pub range: UserQueryPositionRange,
+    pub index: usize,
 }
 
 impl MatchingWords {
-    pub fn new(ctx: SearchContext<'_>, located_terms: Vec<LocatedQueryTerm>) -> Self {
-        let mut phrases = Vec::new();
-        let mut words = Vec::new();
+    pub fn new(ctx: SearchContext, located_terms: &[LocatedQueryTerm]) -> Self {
+        let mut located_matching_phrases = Vec::new();
+        let mut located_matching_words = Vec::new();
 
         // Extract and centralize the different phrases and words to match stored in a QueryTerm
         // and wrap them in dedicated structures.
-        for located_term in located_terms {
-            let term = ctx.term_interner.get(located_term.value);
+        for LocatedQueryTerm { value, positions } in located_terms {
+            let term = ctx.term_interner.get(*value);
             let (matching_words, matching_phrases) = term.all_computed_derivations();
 
-            for matching_phrase in matching_phrases {
-                phrases.push(LocatedMatchingPhrase {
-                    value: matching_phrase,
-                    positions: located_term.positions.clone(),
-                });
+            let position = [*positions.start(), *positions.end()];
+
+            if !matching_phrases.is_empty() {
+                located_matching_phrases.reserve(matching_phrases.len());
+                located_matching_phrases.extend(matching_phrases.iter().map(|matching_phrase| {
+                    LocatedMatchingPhrase { value: *matching_phrase, position }
+                }));
             }
 
-            words.push(LocatedMatchingWords {
-                value: matching_words,
-                positions: located_term.positions.clone(),
-                is_prefix: term.is_prefix(),
-                original_char_count: term.original_word(&ctx).chars().count(),
-            });
+            if !matching_words.is_empty() {
+                located_matching_words.push(LocatedMatchingWords {
+                    value: matching_words,
+                    position,
+                    is_prefix: term.is_prefix(),
+                    original_char_count: term.original_word(&ctx).chars().count(),
+                });
+            }
         }
 
-        // Sort word to put prefixes at the bottom prioritizing the exact matches.
-        words.sort_unstable_by_key(|lmw| (lmw.is_prefix, Reverse(lmw.positions.len())));
+        // Sort words by having `is_prefix` as false first and then by their lengths in reverse order.
+        // This is only meant to help with what we match a token against first.
+        located_matching_words.sort_unstable_by_key(|lmw| {
+            (lmw.is_prefix, Reverse(lmw.position[1] - lmw.position[0]))
+        });
 
         Self {
-            phrases,
-            words,
+            located_matching_phrases,
+            located_matching_words,
             word_interner: ctx.word_interner,
             phrase_interner: ctx.phrase_interner,
         }
     }
 
-    /// Returns an iterator over terms that match or partially match the given token.
-    pub fn match_token<'a, 'b>(&'a self, token: &'b Token<'b>) -> MatchesIter<'a, 'b> {
-        MatchesIter { matching_words: self, phrases: Box::new(self.phrases.iter()), token }
+    fn try_get_phrase_match<'a>(
+        &self,
+        token_position_helper_iter: &mut (impl Iterator<Item = TokenPositionHelper<'a>> + Clone),
+    ) -> Option<(Match, UserQueryPositionRange)> {
+        let mut mapped_phrase_iter = self.located_matching_phrases.iter().map(|lmp| {
+            let words = &self.phrase_interner.get(lmp.value).words;
+
+            let words_iter = words
+                .iter()
+                .map(|maybe_word| maybe_word.map(|word| self.word_interner.get(word).as_str()))
+                .peekable();
+
+            (lmp.position, words_iter)
+        });
+
+        'outer: loop {
+            let (query_position_range, mut words_iter) = mapped_phrase_iter.next()?;
+
+            // TODO: if it's worth it, clone only if we have to
+            let mut tph_iter = token_position_helper_iter.clone();
+
+            let mut first_tph_details = None;
+            let last_tph_details = loop {
+                // 1. get word from `words_iter` and token word thingy from `token_word_thingy_iter`
+                let (Some(word), Some(tph)) = (words_iter.next(), tph_iter.next()) else {
+                    // 2. if there are no more words or token word thingys, get to next phrase and reset `token_word_thingy_iter`
+                    continue 'outer;
+                };
+
+                // ?. save first token position bla bla bla
+                if first_tph_details.is_none() {
+                    first_tph_details = Some([
+                        tph.position_by_token,
+                        tph.position_by_word,
+                        tph.token.char_start,
+                        tph.token.byte_start,
+                    ]);
+                }
+
+                // 3. check if word matches our token
+                let is_matching = match word {
+                    Some(word) => tph.token.lemma() == word,
+                    // a `None` value in the phrase words iterator corresponds to a stop word,
+                    // the value is considered a match if the current token is categorized as a stop word.
+                    None => tph.token.is_stopword(),
+                };
+
+                // 4. if it does not, get to next phrase and restart `token_word_thingy_iter`
+                if !is_matching {
+                    continue 'outer;
+                }
+
+                // 5. if it does, and there are no words left, time to return
+                if words_iter.peek().is_none() {
+                    break [
+                        tph.position_by_token,
+                        tph.position_by_word,
+                        tph.token.char_end,
+                        tph.token.byte_end,
+                    ];
+                }
+            };
+
+            let [first_tph_position_by_token, first_tph_position_by_word, first_tph_char_start, first_tph_byte_start] =
+                first_tph_details.expect("TODO");
+            let [last_tph_position_by_token, last_tph_position_by_word, last_tph_char_end, last_tph_byte_end] =
+                last_tph_details;
+
+            // save new position in parameter iterator
+            *token_position_helper_iter = tph_iter;
+
+            return Some((
+                Match {
+                    // do not +1, because Token index ranges are exclusive
+                    byte_len: last_tph_byte_end - first_tph_byte_start,
+                    char_count: last_tph_char_end - first_tph_char_start,
+                    position: MatchPosition::Phrase {
+                        word_position_range: [
+                            first_tph_position_by_word,
+                            last_tph_position_by_word,
+                        ],
+                        token_position_range: [
+                            first_tph_position_by_token,
+                            last_tph_position_by_token,
+                        ],
+                    },
+                },
+                query_position_range,
+            ));
+        }
     }
 
     /// Try to match the token with one of the located_words.
-    fn match_unique_words<'a>(&'a self, token: &Token<'_>) -> Option<MatchType<'a>> {
-        for located_words in &self.words {
-            for word in &located_words.value {
-                let word = self.word_interner.get(*word);
-                // if the word is a prefix we match using starts_with.
-                if located_words.is_prefix && token.lemma().starts_with(word) {
-                    let Some((char_index, c)) =
-                        word.char_indices().take(located_words.original_char_count).last()
-                    else {
-                        continue;
+    fn try_get_word_match(
+        &self,
+        tph: TokenPositionHelper,
+        text: &str,
+    ) -> Option<(Match, UserQueryPositionRange)> {
+        // TODO: There is potentially an optimization to be made here
+        // if we matched a term then we can skip checking it for further iterations?
+
+        println!(
+            "{:?}",
+            self.located_matching_words
+                .iter()
+                .flat_map(|lw| lw.value.iter().map(move |w| (
+                    lw.is_prefix,
+                    lw.original_char_count,
+                    self.word_interner.get(*w)
+                )))
+                .collect::<Vec<_>>()
+        );
+
+        self.located_matching_words
+            .iter()
+            .flat_map(|lw| lw.value.iter().map(move |w| (lw, self.word_interner.get(*w))))
+            .find_map(|(located_words, word)| {
+                let [char_count, byte_len] =
+                    match PrefixedOrEquality::new(tph.token.lemma(), word, located_words.is_prefix)
+                    {
+                        PrefixedOrEquality::Prefixed => {
+                            let prefix_byte_len = text[tph.token.byte_start..]
+                                .char_indices()
+                                .nth(located_words.original_char_count - 1)
+                                .map(|(i, c)| i + c.len_utf8())
+                                .expect("expected text to have n-th thing bal bla TODO");
+
+                            // TODO: Investigate token original byte length and similar methods and why they're not good enough
+                            //       That might be because token original byte length only or could also refer to the normalized byte length
+
+                            [located_words.original_char_count, prefix_byte_len]
+                        }
+                        // do not +1, because Token index ranges are exclusive
+                        PrefixedOrEquality::Equality => [
+                            tph.token.char_end - tph.token.char_start,
+                            tph.token.byte_end - tph.token.byte_start,
+                        ],
+                        _ => return None,
                     };
-                    let prefix_length = char_index + c.len_utf8();
-                    let (char_count, byte_len) = token.original_lengths(prefix_length);
-                    let ids = &located_words.positions;
-                    return Some(MatchType::Full { ids, char_count, byte_len });
-                // else we exact match the token.
-                } else if token.lemma() == word {
-                    let ids = &located_words.positions;
-                    return Some(MatchType::Full {
-                        char_count: token.char_end - token.char_start,
-                        byte_len: token.byte_end - token.byte_start,
-                        ids,
-                    });
-                }
-            }
-        }
 
-        None
-    }
-}
-
-/// Iterator over terms that match the given token,
-/// This allow to lazily evaluate matches.
-pub struct MatchesIter<'a, 'b> {
-    matching_words: &'a MatchingWords,
-    phrases: Box<dyn Iterator<Item = &'a LocatedMatchingPhrase> + 'a>,
-    token: &'b Token<'b>,
-}
-
-impl<'a> Iterator for MatchesIter<'a, '_> {
-    type Item = MatchType<'a>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self.phrases.next() {
-            // Try to match all the phrases first.
-            Some(located_phrase) => {
-                let phrase = self.matching_words.phrase_interner.get(located_phrase.value);
-
-                // create a PartialMatch struct to make it compute the first match
-                // instead of duplicating the code.
-                let ids = &located_phrase.positions;
-                // collect the references of words from the interner.
-                let words = phrase
-                    .words
-                    .iter()
-                    .map(|word| {
-                        word.map(|word| self.matching_words.word_interner.get(word).as_str())
-                    })
-                    .collect();
-                let partial = PartialMatch { matching_words: words, ids };
-
-                partial.match_token(self.token).or_else(|| self.next())
-            }
-            // If no phrases matches, try to match uiques words.
-            None => self.matching_words.match_unique_words(self.token),
-        }
-    }
-}
-
-/// Id of a matching term corespounding to a word written by the end user.
-pub type WordId = u16;
-
-/// A given token can partially match a query word for several reasons:
-/// - split words
-/// - multi-word synonyms
-///   In these cases we need to match consecutively several tokens to consider that the match is full.
-#[derive(Debug, PartialEq)]
-pub enum MatchType<'a> {
-    Full { char_count: usize, byte_len: usize, ids: &'a RangeInclusive<WordId> },
-    Partial(PartialMatch<'a>),
-}
-
-/// Structure helper to match several tokens in a row in order to complete a partial match.
-#[derive(Debug, PartialEq)]
-pub struct PartialMatch<'a> {
-    matching_words: Vec<Option<&'a str>>,
-    ids: &'a RangeInclusive<WordId>,
-}
-
-impl<'a> PartialMatch<'a> {
-    /// Returns:
-    /// - None if the given token breaks the partial match
-    /// - Partial if the given token matches the partial match but doesn't complete it
-    /// - Full if the given token completes the partial match
-    pub fn match_token(self, token: &Token<'_>) -> Option<MatchType<'a>> {
-        let Self { mut matching_words, ids, .. } = self;
-
-        let is_matching = match matching_words.first()? {
-            Some(word) => &token.lemma() == word,
-            // a None value in the phrase corresponds to a stop word,
-            // the walue is considered a match if the current token is categorized as a stop word.
-            None => token.is_stopword(),
-        };
-
-        // if there are remaining words to match in the phrase and the current token is matching,
-        // return a new Partial match allowing the highlighter to continue.
-        if is_matching && matching_words.len() > 1 {
-            matching_words.remove(0);
-            Some(MatchType::Partial(Self { matching_words, ids }))
-        // if there is no remaining word to match in the phrase and the current token is matching,
-        // return a Full match.
-        } else if is_matching {
-            Some(MatchType::Full {
-                char_count: token.char_end - token.char_start,
-                byte_len: token.byte_end - token.byte_start,
-                ids,
+                Some((
+                    Match {
+                        char_count,
+                        byte_len,
+                        position: MatchPosition::Word {
+                            word_position: tph.position_by_word,
+                            token_position: tph.position_by_token,
+                        },
+                    },
+                    located_words.position,
+                ))
             })
-        // if the current token doesn't match, return None to break the match sequence.
-        } else {
-            None
+    }
+
+    pub fn get_matches_and_query_positions(
+        &self,
+        tokens: &[Token],
+        text: &str,
+    ) -> (Vec<Match>, Vec<QueryPosition>) {
+        // TODO: Note in the doc that with the help of this iter, matches are guaranteed to be ordered
+        let mut token_position_helper_iter = TokenPositionHelper::iter_from_tokens(tokens);
+        let mut matches = Vec::new();
+        let mut query_positions = Vec::new();
+
+        loop {
+            // try and get a phrase match
+            if let Some((r#match, range)) =
+                self.try_get_phrase_match(&mut token_position_helper_iter)
+            {
+                matches.push(r#match);
+                query_positions.push(QueryPosition { range, index: matches.len() - 1 });
+
+                continue;
+            }
+
+            // if the above fails, try get next token position helper
+            if let Some(tph) = token_position_helper_iter.next() {
+                // and then try and get a word match
+                if let Some((r#match, range)) = self.try_get_word_match(tph, text) {
+                    matches.push(r#match);
+                    query_positions.push(QueryPosition { range, index: matches.len() - 1 });
+                }
+            } else {
+                // there are no more items in the iterator, we are done searching for matches
+                break;
+            };
         }
+
+        // TODO: Explain why
+        query_positions.sort_unstable_by_key(|v| v.range[0]);
+
+        (matches, query_positions)
     }
 }
 
-impl fmt::Debug for MatchingWords {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let MatchingWords { word_interner, phrase_interner, phrases, words } = self;
+impl Debug for MatchingWords {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        let MatchingWords {
+            word_interner,
+            phrase_interner,
+            located_matching_phrases: phrases,
+            located_matching_words: words,
+        } = self;
 
         let phrases: Vec<_> = phrases
             .iter()
@@ -213,37 +361,33 @@ impl fmt::Debug for MatchingWords {
                         .map(|w| w.map_or("STOP_WORD", |w| word_interner.get(w)))
                         .collect::<Vec<_>>()
                         .join(" "),
-                    p.positions.clone(),
+                    p.position,
                 )
             })
             .collect();
-
         let words: Vec<_> = words
             .iter()
             .flat_map(|w| {
                 w.value
                     .iter()
-                    .map(|s| (word_interner.get(*s), w.positions.clone(), w.is_prefix))
+                    .map(|s| (word_interner.get(*s), w.position, w.is_prefix))
                     .collect::<Vec<_>>()
             })
             .collect();
-
         f.debug_struct("MatchingWords").field("phrases", &phrases).field("words", &words).finish()
     }
 }
 
 #[cfg(test)]
-pub(crate) mod tests {
-    use std::borrow::Cow;
-
-    use charabia::{TokenKind, TokenizerBuilder};
-
+mod tests {
     use super::super::super::located_query_terms_from_tokens;
     use super::*;
     use crate::index::tests::TempIndex;
     use crate::search::new::query_term::ExtractedTokens;
+    use charabia::{TokenKind, TokenizerBuilder};
+    use std::borrow::Cow;
 
-    pub(crate) fn temp_index_with_documents() -> TempIndex {
+    fn temp_index_with_documents() -> TempIndex {
         let temp_index = TempIndex::new();
         temp_index
             .add_documents(documents!([
@@ -262,70 +406,77 @@ pub(crate) mod tests {
         let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
         let mut builder = TokenizerBuilder::default();
         let tokenizer = builder.build();
-        let tokens = tokenizer.tokenize("split this world");
+        let text = "split this world";
+        let tokens = tokenizer.tokenize(text);
         let ExtractedTokens { query_terms, .. } =
             located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
-        let matching_words = MatchingWords::new(ctx, query_terms);
+        let matching_words = MatchingWords::new(ctx, &query_terms);
 
         assert_eq!(
-            matching_words
-                .match_token(&Token {
-                    kind: TokenKind::Word,
-                    lemma: Cow::Borrowed("split"),
-                    char_end: "split".chars().count(),
-                    byte_end: "split".len(),
-                    ..Default::default()
-                })
-                .next(),
-            Some(MatchType::Full { char_count: 5, byte_len: 5, ids: &(0..=0) })
-        );
-        assert_eq!(
-            matching_words
-                .match_token(&Token {
-                    kind: TokenKind::Word,
-                    lemma: Cow::Borrowed("nyc"),
-                    char_end: "nyc".chars().count(),
-                    byte_end: "nyc".len(),
-                    ..Default::default()
-                })
-                .next(),
-            None
-        );
-        assert_eq!(
-            matching_words
-                .match_token(&Token {
-                    kind: TokenKind::Word,
-                    lemma: Cow::Borrowed("world"),
-                    char_end: "world".chars().count(),
-                    byte_end: "world".len(),
-                    ..Default::default()
-                })
-                .next(),
-            Some(MatchType::Full { char_count: 5, byte_len: 5, ids: &(2..=2) })
-        );
-        assert_eq!(
-            matching_words
-                .match_token(&Token {
-                    kind: TokenKind::Word,
-                    lemma: Cow::Borrowed("worlded"),
-                    char_end: "worlded".chars().count(),
-                    byte_end: "worlded".len(),
-                    ..Default::default()
-                })
-                .next(),
-            Some(MatchType::Full { char_count: 5, byte_len: 5, ids: &(2..=2) })
-        );
-        assert_eq!(
-            matching_words
-                .match_token(&Token {
-                    kind: TokenKind::Word,
-                    lemma: Cow::Borrowed("thisnew"),
-                    char_end: "thisnew".chars().count(),
-                    byte_end: "thisnew".len(),
-                    ..Default::default()
-                })
-                .next(),
-            None
+            matching_words.get_matches_and_query_positions(
+                &[
+                    Token {
+                        kind: TokenKind::Word,
+                        lemma: Cow::Borrowed("split"),
+                        char_end: "split".chars().count(),
+                        byte_end: "split".len(),
+                        ..Default::default()
+                    },
+                    Token {
+                        kind: TokenKind::Word,
+                        lemma: Cow::Borrowed("nyc"),
+                        char_end: "nyc".chars().count(),
+                        byte_end: "nyc".len(),
+                        ..Default::default()
+                    },
+                    Token {
+                        kind: TokenKind::Word,
+                        lemma: Cow::Borrowed("world"),
+                        char_end: "world".chars().count(),
+                        byte_end: "world".len(),
+                        ..Default::default()
+                    },
+                    Token {
+                        kind: TokenKind::Word,
+                        lemma: Cow::Borrowed("worlded"),
+                        char_end: "worlded".chars().count(),
+                        byte_end: "worlded".len(),
+                        ..Default::default()
+                    },
+                    Token {
+                        kind: TokenKind::Word,
+                        lemma: Cow::Borrowed("thisnew"),
+                        char_end: "thisnew".chars().count(),
+                        byte_end: "thisnew".len(),
+                        ..Default::default()
+                    }
+                ],
+                text
+            ),
+            (
+                vec![
+                    Match {
+                        char_count: 5,
+                        byte_len: 5,
+                        position: MatchPosition::Word { word_position: 0, token_position: 0 }
+                    },
+                    Match {
+                        char_count: 5,
+                        byte_len: 5,
+                        position: MatchPosition::Word { word_position: 2, token_position: 2 }
+                    },
+                    Match {
+                        char_count: 5,
+                        byte_len: 5,
+                        position: MatchPosition::Word { word_position: 3, token_position: 3 }
+                    }
+                ],
+                vec![
+                    QueryPosition { range: [0, 0], index: 0 },
+                    QueryPosition { range: [2, 2], index: 1 },
+                    QueryPosition { range: [2, 2], index: 2 }
+                ]
+            )
         );
     }
 }
diff --git a/crates/milli/src/search/new/matches/mod.rs b/crates/milli/src/search/new/matches/mod.rs
index 2d6f2cf17..f47582af7 100644
--- a/crates/milli/src/search/new/matches/mod.rs
+++ b/crates/milli/src/search/new/matches/mod.rs
@@ -1,92 +1,54 @@
-mod best_match_interval;
+mod adjust_indices;
+mod best_match_range;
 mod r#match;
+mod match_bounds;
 mod matching_words;
-mod simple_token_kind;
 
-use std::borrow::Cow;
-use std::cmp::{max, min};
-
-use charabia::{Language, SeparatorKind, Token, Tokenizer};
-use either::Either;
-use itertools::Itertools;
+use charabia::{Language, Token, Tokenizer};
+pub use match_bounds::MatchBounds;
 pub use matching_words::MatchingWords;
-use matching_words::{MatchType, PartialMatch};
-use r#match::{Match, MatchPosition};
-use serde::{Deserialize, Serialize};
-use simple_token_kind::SimpleTokenKind;
-use utoipa::ToSchema;
+use matching_words::QueryPosition;
+use r#match::Match;
 
-const DEFAULT_CROP_MARKER: &str = "…";
-const DEFAULT_HIGHLIGHT_PREFIX: &str = "<em>";
-const DEFAULT_HIGHLIGHT_SUFFIX: &str = "</em>";
-
-/// Structure used to build a Matcher allowing to customize formatting tags.
-pub struct MatcherBuilder<'m> {
-    matching_words: MatchingWords,
-    tokenizer: Tokenizer<'m>,
-    crop_marker: Option<String>,
-    highlight_prefix: Option<String>,
-    highlight_suffix: Option<String>,
+pub struct MarkerOptions {
+    pub highlight_pre_tag: String,
+    pub highlight_post_tag: String,
+    pub crop_marker: String,
 }
 
-impl<'m> MatcherBuilder<'m> {
-    pub fn new(matching_words: MatchingWords, tokenizer: Tokenizer<'m>) -> Self {
-        Self {
-            matching_words,
-            tokenizer,
-            crop_marker: None,
-            highlight_prefix: None,
-            highlight_suffix: None,
-        }
-    }
+/// Structure used to build a Matcher allowing to customize formatting tags.
+pub struct MatcherBuilder<'a> {
+    matching_words: MatchingWords,
+    tokenizer: Tokenizer<'a>,
+    marker_options: MarkerOptions,
+}
 
-    pub fn crop_marker(&mut self, marker: String) -> &Self {
-        self.crop_marker = Some(marker);
-        self
-    }
-
-    pub fn highlight_prefix(&mut self, prefix: String) -> &Self {
-        self.highlight_prefix = Some(prefix);
-        self
-    }
-
-    pub fn highlight_suffix(&mut self, suffix: String) -> &Self {
-        self.highlight_suffix = Some(suffix);
-        self
+impl<'a> MatcherBuilder<'a> {
+    pub fn new(
+        matching_words: MatchingWords,
+        tokenizer: Tokenizer<'a>,
+        marker_options: MarkerOptions,
+    ) -> Self {
+        Self { matching_words, tokenizer, marker_options }
     }
 
     pub fn build<'t, 'lang>(
         &self,
         text: &'t str,
         locales: Option<&'lang [Language]>,
-    ) -> Matcher<'t, 'm, '_, 'lang> {
-        let crop_marker = match &self.crop_marker {
-            Some(marker) => marker.as_str(),
-            None => DEFAULT_CROP_MARKER,
-        };
-
-        let highlight_prefix = match &self.highlight_prefix {
-            Some(marker) => marker.as_str(),
-            None => DEFAULT_HIGHLIGHT_PREFIX,
-        };
-        let highlight_suffix = match &self.highlight_suffix {
-            Some(marker) => marker.as_str(),
-            None => DEFAULT_HIGHLIGHT_SUFFIX,
-        };
+    ) -> Matcher<'t, 'a, '_, 'lang> {
         Matcher {
             text,
             matching_words: &self.matching_words,
             tokenizer: &self.tokenizer,
-            crop_marker,
-            highlight_prefix,
-            highlight_suffix,
-            matches: None,
+            marker_options: &self.marker_options,
+            tokens_matches_and_query_positions: None,
             locales,
         }
     }
 }
 
-#[derive(Copy, Clone, Default, Debug)]
+#[derive(Copy, Clone, Default)]
 pub struct FormatOptions {
     pub highlight: bool,
     pub crop: Option<usize>,
@@ -102,14 +64,6 @@ impl FormatOptions {
     }
 }
 
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)]
-pub struct MatchBounds {
-    pub start: usize,
-    pub length: usize,
-    #[serde(skip_serializing_if = "Option::is_none", default)]
-    pub indices: Option<Vec<usize>>,
-}
-
 /// Structure used to analyze a string, compute words that match,
 /// and format the source string, returning a highlighted and cropped sub-string.
 pub struct Matcher<'t, 'tokenizer, 'b, 'lang> {
@@ -117,388 +71,88 @@ pub struct Matcher<'t, 'tokenizer, 'b, 'lang> {
     matching_words: &'b MatchingWords,
     tokenizer: &'b Tokenizer<'tokenizer>,
     locales: Option<&'lang [Language]>,
-    crop_marker: &'b str,
-    highlight_prefix: &'b str,
-    highlight_suffix: &'b str,
-    matches: Option<(Vec<Token<'t>>, Vec<Match>)>,
+    marker_options: &'b MarkerOptions,
+    tokens_matches_and_query_positions: Option<((Vec<Match>, Vec<QueryPosition>), Vec<Token<'t>>)>,
 }
 
-impl<'t> Matcher<'t, '_, '_, '_> {
-    /// Iterates over tokens and save any of them that matches the query.
-    fn compute_matches(&mut self) -> &mut Self {
-        /// some words are counted as matches only if they are close together and in the good order,
-        /// compute_partial_match peek into next words to validate if the match is complete.
-        fn compute_partial_match<'a>(
-            mut partial: PartialMatch<'a>,
-            first_token_position: usize,
-            first_word_position: usize,
-            first_word_char_start: &usize,
-            words_positions: &mut impl Iterator<Item = (usize, usize, &'a Token<'a>)>,
-            matches: &mut Vec<Match>,
-        ) -> bool {
-            for (token_position, word_position, word) in words_positions {
-                partial = match partial.match_token(word) {
-                    // token matches the partial match, but the match is not full,
-                    // we temporarily save the current token then we try to match the next one.
-                    Some(MatchType::Partial(partial)) => partial,
-                    // partial match is now full, we keep this matches and we advance positions
-                    Some(MatchType::Full { ids, .. }) => {
-                        // save the token that closes the partial match as a match.
-                        matches.push(Match {
-                            char_count: word.char_end - *first_word_char_start,
-                            ids: ids.clone().collect(),
-                            position: MatchPosition::Phrase {
-                                word_positions: [first_word_position, word_position],
-                                token_positions: [first_token_position, token_position],
-                            },
-                        });
-
-                        // the match is complete, we return true.
-                        return true;
-                    }
-                    // no match, continue to next match.
-                    None => break,
-                };
-            }
-
-            // the match is not complete, we return false.
-            false
+impl Matcher<'_, '_, '_, '_> {
+    /// TODO: description
+    pub fn get_match_bounds(
+        &mut self,
+        // TODO: Add option to count UTF-16 segments, or whatever JS works with when slicing strings
+        // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String#utf-16_characters_unicode_code_points_and_grapheme_clusters
+        // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/slice
+        format_options: Option<FormatOptions>,
+    ) -> Option<MatchBounds> {
+        if self.text.is_empty() {
+            return None;
         }
 
-        let tokens: Vec<_> =
-            self.tokenizer.tokenize_with_allow_list(self.text, self.locales).collect();
-        let mut matches = Vec::new();
+        let ((matches, query_positions), tokens) =
+            self.tokens_matches_and_query_positions.get_or_insert_with(|| {
+                let tokens = self
+                    .tokenizer
+                    .tokenize_with_allow_list(self.text, self.locales)
+                    .collect::<Vec<_>>();
 
-        let mut words_positions = tokens
-            .iter()
-            .scan((0, 0), |(token_position, word_position), token| {
-                let current_token_position = *token_position;
-                let current_word_position = *word_position;
-                *token_position += 1;
-                if !token.is_separator() {
-                    *word_position += 1;
-                }
+                (self.matching_words.get_matches_and_query_positions(&tokens, self.text), tokens)
+            });
 
-                Some((current_token_position, current_word_position, token))
-            })
-            .filter(|(_, _, token)| !token.is_separator());
-
-        while let Some((token_position, word_position, word)) = words_positions.next() {
-            for match_type in self.matching_words.match_token(word) {
-                match match_type {
-                    // we match, we save the current token as a match,
-                    // then we continue the rest of the tokens.
-                    MatchType::Full { ids, char_count, .. } => {
-                        let ids: Vec<_> = ids.clone().collect();
-                        matches.push(Match {
-                            char_count,
-                            ids,
-                            position: MatchPosition::Word { word_position, token_position },
-                        });
-                        break;
-                    }
-                    // we match partially, iterate over next tokens to check if we can complete the match.
-                    MatchType::Partial(partial) => {
-                        // if match is completed, we break the matching loop over the current token,
-                        // then we continue the rest of the tokens.
-                        let mut wp = words_positions.clone();
-                        if compute_partial_match(
-                            partial,
-                            token_position,
-                            word_position,
-                            &word.char_start,
-                            &mut wp,
-                            &mut matches,
-                        ) {
-                            words_positions = wp;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-
-        self.matches = Some((tokens, matches));
-        self
+        MatchBounds::try_new(tokens, matches, query_positions, format_options.unwrap_or_default())
     }
 
-    /// Returns boundaries of the words that match the query.
-    pub fn matches(&mut self, array_indices: &[usize]) -> Vec<MatchBounds> {
-        match &self.matches {
-            None => self.compute_matches().matches(array_indices),
-            Some((tokens, matches)) => matches
-                .iter()
-                .map(|m| MatchBounds {
-                    start: tokens[m.get_first_token_pos()].byte_start,
-                    length: self.calc_byte_length(tokens, m),
-                    indices: if array_indices.is_empty() {
-                        None
-                    } else {
-                        Some(array_indices.to_owned())
-                    },
-                })
-                .collect(),
+    pub fn get_formatted_text(&mut self, format_options: Option<FormatOptions>) -> Option<String> {
+        let MatchBounds { mut highlight_toggle, ref indices } =
+            self.get_match_bounds(format_options)?;
+
+        let MarkerOptions { highlight_pre_tag, highlight_post_tag, crop_marker } =
+            &self.marker_options;
+
+        let mut formatted_text = Vec::new();
+
+        let mut indices_iter = indices.iter();
+        let mut previous_index = indices_iter.next().expect("TODO");
+
+        // push crop marker if it's not the start of the text
+        if !crop_marker.is_empty() && *previous_index != 0 {
+            formatted_text.push(crop_marker.as_str());
         }
-    }
 
-    fn calc_byte_length(&self, tokens: &[Token<'t>], m: &Match) -> usize {
-        (m.get_first_token_pos()..=m.get_last_token_pos())
-            .flat_map(|i| match &tokens[i].char_map {
-                Some(char_map) => {
-                    char_map.iter().map(|(original, _)| *original as usize).collect_vec()
-                }
-                None => tokens[i].lemma().chars().map(|c| c.len_utf8()).collect_vec(),
-            })
-            .take(m.char_count)
-            .sum()
-    }
-
-    /// Returns the bounds in byte index of the crop window.
-    fn crop_bounds(&self, tokens: &[Token<'_>], matches: &[Match], crop_size: usize) -> [usize; 2] {
-        let (
-            mut remaining_words,
-            is_iterating_forward,
-            before_tokens_starting_index,
-            after_tokens_starting_index,
-        ) = if !matches.is_empty() {
-            let [matches_first, matches_last] =
-                best_match_interval::find_best_match_interval(matches, crop_size);
-
-            let matches_size =
-                matches_last.get_last_word_pos() - matches_first.get_first_word_pos() + 1;
-
-            let is_crop_size_gte_match_size = crop_size >= matches_size;
-            let is_iterating_forward = matches_size == 0 || is_crop_size_gte_match_size;
-
-            let remaining_words = if is_crop_size_gte_match_size {
-                crop_size - matches_size
-            } else {
-                // in case matches size is greater than crop size, which implies there's only one match,
-                // we count words backwards, because we have to remove words, as they're extra words outside of
-                // crop window
-                matches_size - crop_size
-            };
-
-            let after_tokens_starting_index = if matches_size == 0 {
-                0
-            } else {
-                let last_match_last_token_position_plus_one = matches_last.get_last_token_pos() + 1;
-                if last_match_last_token_position_plus_one < tokens.len() {
-                    last_match_last_token_position_plus_one
-                } else {
-                    // we have matched the end of possible tokens, there's nothing to advance
-                    tokens.len()
-                }
-            };
-
-            (
-                remaining_words,
-                is_iterating_forward,
-                if is_iterating_forward { matches_first.get_first_token_pos() } else { 0 },
-                after_tokens_starting_index,
-            )
-        } else {
-            (crop_size, true, 0, 0)
-        };
-
-        // create the initial state of the crop window: 2 iterators starting from the matches positions,
-        // a reverse iterator starting from the first match token position and going towards the beginning of the text,
-        let mut before_tokens = tokens[..before_tokens_starting_index].iter().rev().peekable();
-        // an iterator ...
-        let mut after_tokens = if is_iterating_forward {
-            // ... starting from the last match token position and going towards the end of the text.
-            Either::Left(tokens[after_tokens_starting_index..].iter().peekable())
-        } else {
-            // ... starting from the last match token position and going towards the start of the text.
-            Either::Right(tokens[..=after_tokens_starting_index].iter().rev().peekable())
-        };
-
-        // grows the crop window peeking in both directions
-        // until the window contains the good number of words:
-        while remaining_words > 0 {
-            let before_token_kind = before_tokens.peek().map(SimpleTokenKind::new);
-            let after_token_kind =
-                after_tokens.as_mut().either(|v| v.peek(), |v| v.peek()).map(SimpleTokenKind::new);
-
-            match (before_token_kind, after_token_kind) {
-                // we can expand both sides.
-                (Some(before_token_kind), Some(after_token_kind)) => {
-                    match (before_token_kind, after_token_kind) {
-                        // if they are both separators and are the same kind then advance both,
-                        // or expand in the soft separator separator side.
-                        (
-                            SimpleTokenKind::Separator(before_token_separator_kind),
-                            SimpleTokenKind::Separator(after_token_separator_kind),
-                        ) => {
-                            if before_token_separator_kind == after_token_separator_kind {
-                                before_tokens.next();
-
-                                // this avoid having an ending separator before crop marker.
-                                if remaining_words > 1 {
-                                    after_tokens.next();
-                                }
-                            } else if matches!(before_token_separator_kind, SeparatorKind::Hard) {
-                                after_tokens.next();
-                            } else {
-                                before_tokens.next();
-                            }
-                        }
-                        // if one of the tokens is a word, we expend in the side of the word.
-                        // left is a word, advance left.
-                        (SimpleTokenKind::NotSeparator, SimpleTokenKind::Separator(_)) => {
-                            before_tokens.next();
-                            remaining_words -= 1;
-                        }
-                        // right is a word, advance right.
-                        (SimpleTokenKind::Separator(_), SimpleTokenKind::NotSeparator) => {
-                            after_tokens.next();
-                            remaining_words -= 1;
-                        }
-                        // both are words, advance left then right if remaining_word > 0.
-                        (SimpleTokenKind::NotSeparator, SimpleTokenKind::NotSeparator) => {
-                            before_tokens.next();
-                            remaining_words -= 1;
-
-                            if remaining_words > 0 {
-                                after_tokens.next();
-                                remaining_words -= 1;
-                            }
-                        }
-                    }
-                }
-                // the end of the text is reached, advance left.
-                (Some(before_token_kind), None) => {
-                    before_tokens.next();
-                    if matches!(before_token_kind, SimpleTokenKind::NotSeparator) {
-                        remaining_words -= 1;
-                    }
-                }
-                // the start of the text is reached, advance right.
-                (None, Some(after_token_kind)) => {
-                    after_tokens.next();
-                    if matches!(after_token_kind, SimpleTokenKind::NotSeparator) {
-                        remaining_words -= 1;
-                    }
-                }
-                // no more token to add.
-                (None, None) => break,
+        for index in indices_iter {
+            if highlight_toggle {
+                formatted_text.push(highlight_pre_tag.as_str());
             }
-        }
 
-        // finally, keep the byte index of each bound of the crop window.
-        let crop_byte_start = before_tokens.next().map_or(0, |t| t.byte_end);
-        let crop_byte_end = after_tokens.next().map_or(self.text.len(), |t| t.byte_start);
+            formatted_text.push(&self.text[*previous_index..*index]);
 
-        [crop_byte_start, crop_byte_end]
-    }
-
-    // Returns the formatted version of the original text.
-    pub fn format(&mut self, format_options: FormatOptions) -> Cow<'t, str> {
-        if !format_options.highlight && format_options.crop.is_none() {
-            // compute matches is not needed if no highlight nor crop is requested.
-            Cow::Borrowed(self.text)
-        } else {
-            match &self.matches {
-                Some((tokens, matches)) => {
-                    // If the text has to be cropped, crop around the best interval.
-                    let [crop_byte_start, crop_byte_end] = match format_options.crop {
-                        Some(crop_size) if crop_size > 0 => {
-                            self.crop_bounds(tokens, matches, crop_size)
-                        }
-                        _ => [0, self.text.len()],
-                    };
-
-                    let mut formatted = Vec::new();
-
-                    // push crop marker if it's not the start of the text.
-                    if crop_byte_start > 0 && !self.crop_marker.is_empty() {
-                        formatted.push(self.crop_marker);
-                    }
-
-                    let mut byte_index = crop_byte_start;
-
-                    if format_options.highlight {
-                        // insert highlight markers around matches.
-                        for m in matches {
-                            let [m_byte_start, m_byte_end] = match m.position {
-                                MatchPosition::Word { token_position, .. } => {
-                                    let token = &tokens[token_position];
-                                    [&token.byte_start, &token.byte_end]
-                                }
-                                MatchPosition::Phrase { token_positions: [ftp, ltp], .. } => {
-                                    [&tokens[ftp].byte_start, &tokens[ltp].byte_end]
-                                }
-                            };
-
-                            // skip matches out of the crop window
-                            if *m_byte_end < crop_byte_start || *m_byte_start > crop_byte_end {
-                                continue;
-                            }
-
-                            // adjust start and end to the crop window size
-                            let [m_byte_start, m_byte_end] = [
-                                max(m_byte_start, &crop_byte_start),
-                                min(m_byte_end, &crop_byte_end),
-                            ];
-
-                            // push text that is positioned before our matches
-                            if byte_index < *m_byte_start {
-                                formatted.push(&self.text[byte_index..*m_byte_start]);
-                            }
-
-                            formatted.push(self.highlight_prefix);
-
-                            // TODO: This is additional work done, charabia::token::Token byte_len
-                            // should already get us the original byte length, however, that doesn't work as
-                            // it's supposed to, investigate why
-                            let highlight_byte_index = self.text[*m_byte_start..]
-                                .char_indices()
-                                .nth(m.char_count)
-                                .map_or(*m_byte_end, |(i, _)| min(i + *m_byte_start, *m_byte_end));
-                            formatted.push(&self.text[*m_byte_start..highlight_byte_index]);
-
-                            formatted.push(self.highlight_suffix);
-
-                            // if it's a prefix highlight, we put the end of the word after the highlight marker.
-                            if highlight_byte_index < *m_byte_end {
-                                formatted.push(&self.text[highlight_byte_index..*m_byte_end]);
-                            }
-
-                            byte_index = *m_byte_end;
-                        }
-                    }
-
-                    // push the rest of the text between last match and the end of crop.
-                    if byte_index < crop_byte_end {
-                        formatted.push(&self.text[byte_index..crop_byte_end]);
-                    }
-
-                    // push crop marker if it's not the end of the text.
-                    if crop_byte_end < self.text.len() && !self.crop_marker.is_empty() {
-                        formatted.push(self.crop_marker);
-                    }
-
-                    if formatted.len() == 1 {
-                        // avoid concatenating if there is already 1 slice.
-                        Cow::Borrowed(&self.text[crop_byte_start..crop_byte_end])
-                    } else {
-                        Cow::Owned(formatted.concat())
-                    }
-                }
-                None => self.compute_matches().format(format_options),
+            if highlight_toggle {
+                formatted_text.push(highlight_post_tag.as_str());
             }
+
+            highlight_toggle = !highlight_toggle;
+            previous_index = index;
         }
+
+        // push crop marker if it's not the end of the text
+        if !crop_marker.is_empty() && *previous_index < self.text.len() {
+            formatted_text.push(crop_marker.as_str());
+        }
+
+        if formatted_text.len() == 1 {
+            // avoid concatenating if there is only one element
+            return Some(formatted_text[0].to_string());
+        }
+
+        Some(formatted_text.concat())
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use charabia::TokenizerBuilder;
-    use matching_words::tests::temp_index_with_documents;
-
     use super::*;
     use crate::index::tests::TempIndex;
     use crate::{execute_search, filtered_universe, SearchContext, TimeBudget};
+    use charabia::TokenizerBuilder;
 
     impl<'a> MatcherBuilder<'a> {
         fn new_test(rtxn: &'a heed::RoTxn<'a>, index: &'a TempIndex, query: &str) -> Self {
@@ -526,423 +180,321 @@ mod tests {
             .unwrap();
 
             // consume context and located_query_terms to build MatchingWords.
-            let matching_words = match located_query_terms {
-                Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms),
-                None => MatchingWords::default(),
-            };
+            let matching_words = located_query_terms
+                .map(|located_query_terms| MatchingWords::new(ctx, &located_query_terms))
+                .unwrap_or_default();
 
-            MatcherBuilder::new(matching_words, TokenizerBuilder::default().into_tokenizer())
+            MatcherBuilder::new(
+                matching_words,
+                TokenizerBuilder::default().into_tokenizer(),
+                MarkerOptions {
+                    highlight_pre_tag: "<em>".to_string(),
+                    highlight_post_tag: "</em>".to_string(),
+                    crop_marker: "…".to_string(),
+                },
+            )
         }
     }
 
-    #[test]
-    fn format_identity() {
-        let temp_index = temp_index_with_documents();
-        let rtxn = temp_index.read_txn().unwrap();
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
-
-        let format_options = FormatOptions { highlight: false, crop: None };
-
-        // Text without any match.
-        let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let mut matcher = builder.build(text, None);
-        // no crop and no highlight should return complete text.
-        assert_eq!(&matcher.format(format_options), &text);
-
-        // Text containing all matches.
-        let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let mut matcher = builder.build(text, None);
-        // no crop and no highlight should return complete text.
-        assert_eq!(&matcher.format(format_options), &text);
-
-        // Text containing some matches.
-        let text = "Natalie risk her future to build a world with the boy she loves.";
-        let mut matcher = builder.build(text, None);
-        // no crop and no highlight should return complete text.
-        assert_eq!(&matcher.format(format_options), &text);
-    }
-
-    #[test]
-    fn format_highlight() {
-        let temp_index = temp_index_with_documents();
-        let rtxn = temp_index.read_txn().unwrap();
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
-
-        let format_options = FormatOptions { highlight: true, crop: None };
-
-        // empty text.
-        let text = "";
-        let mut matcher = builder.build(text, None);
-        assert_eq!(&matcher.format(format_options), "");
-
-        // text containing only separators.
-        let text = ":-)";
-        let mut matcher = builder.build(text, None);
-        assert_eq!(&matcher.format(format_options), ":-)");
-
-        // Text without any match.
-        let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let mut matcher = builder.build(text, None);
-        // no crop should return complete text, because there is no matches.
-        assert_eq!(&matcher.format(format_options), &text);
-
-        // Text containing all matches.
-        let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let mut matcher = builder.build(text, None);
-        // no crop should return complete text with highlighted matches.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."
-        );
-
-        // Text containing some matches.
-        let text = "Natalie risk her future to build a world with the boy she loves.";
-        let mut matcher = builder.build(text, None);
-        // no crop should return complete text with highlighted matches.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves."
-        );
-    }
-
-    #[test]
-    fn highlight_unicode() {
-        let temp_index = temp_index_with_documents();
-        let rtxn = temp_index.read_txn().unwrap();
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "world");
-        let format_options = FormatOptions { highlight: true, crop: None };
-
-        // Text containing prefix match.
-        let text = "Ŵôřlḑôle";
-        let mut matcher = builder.build(text, None);
-        // no crop should return complete text with highlighted matches.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"<em>Ŵôřlḑ</em>ôle"
-        );
-
-        // Text containing unicode match.
-        let text = "Ŵôřlḑ";
-        let mut matcher = builder.build(text, None);
-        // no crop should return complete text with highlighted matches.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"<em>Ŵôřlḑ</em>"
-        );
-
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "westfali");
-        let format_options = FormatOptions { highlight: true, crop: None };
-
-        // Text containing unicode match.
-        let text = "Westfália";
-        let mut matcher = builder.build(text, None);
-        // no crop should return complete text with highlighted matches.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"<em>Westfáli</em>a"
-        );
-    }
-
-    #[test]
-    fn format_crop() {
-        let temp_index = temp_index_with_documents();
-        let rtxn = temp_index.read_txn().unwrap();
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
-
-        let format_options = FormatOptions { highlight: false, crop: Some(10) };
-
-        // empty text.
-        let text = "";
-        let mut matcher = builder.build(text, None);
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @""
-        );
-
-        // text containing only separators.
-        let text = ":-)";
-        let mut matcher = builder.build(text, None);
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @":-)"
-        );
-
-        // Text without any match.
-        let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let mut matcher = builder.build(text, None);
-        // no highlight should return 10 first words with a marker at the end.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"A quick brown fox can not jump 32 feet, right…"
-        );
-
-        // Text without any match starting by a separator.
-        let text = "(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)";
-        let mut matcher = builder.build(text, None);
-        // no highlight should return 10 first words with a marker at the end.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"(A quick brown fox can not jump 32 feet, right…"
-        );
-
-        // Test phrase propagation
-        let text = "Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.";
-        let mut matcher = builder.build(text, None);
-        // should crop the phrase instead of croping around the match.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…Split The World is a book written by Emily Henry…"
-        );
-
-        // Text containing some matches.
-        let text = "Natalie risk her future to build a world with the boy she loves.";
-        let mut matcher = builder.build(text, None);
-        // no highlight should return 10 last words with a marker at the start.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…future to build a world with the boy she loves…"
-        );
-
-        // Text containing all matches.
-        let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let mut matcher = builder.build(text, None);
-        // no highlight should return 10 last words with a marker at the start.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…she loves. Emily Henry: The Love That Split The World."
-        );
-
-        // Text containing a match unordered and a match ordered.
-        let text = "The world split void void void void void void void void void split the world void void";
-        let mut matcher = builder.build(text, None);
-        // crop should return 10 last words with a marker at the start.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…void void void void void split the world void void"
-        );
-
-        // Text containing matches with different density.
-        let text = "split void the void void world void void void void void void void void void void split the world void void";
-        let mut matcher = builder.build(text, None);
-        // crop should return 10 last words with a marker at the start.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…void void void void void split the world void void"
-        );
-
-        // Text containing matches with same word.
-        let text = "split split split split split split void void void void void void void void void void split the world void void";
-        let mut matcher = builder.build(text, None);
-        // crop should return 10 last words with a marker at the start.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…void void void void void split the world void void"
-        );
-    }
-
-    #[test]
-    fn format_highlight_crop() {
-        let temp_index = temp_index_with_documents();
-        let rtxn = temp_index.read_txn().unwrap();
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
-
-        let format_options = FormatOptions { highlight: true, crop: Some(10) };
-
-        // empty text.
-        let text = "";
-        let mut matcher = builder.build(text, None);
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @""
-        );
-
-        // text containing only separators.
-        let text = ":-)";
-        let mut matcher = builder.build(text, None);
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @":-)"
-        );
-
-        // Text without any match.
-        let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let mut matcher = builder.build(text, None);
-        // both should return 10 first words with a marker at the end.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"A quick brown fox can not jump 32 feet, right…"
-        );
-
-        // Text containing some matches.
-        let text = "Natalie risk her future to build a world with the boy she loves.";
-        let mut matcher = builder.build(text, None);
-        // both should return 10 last words with a marker at the start and highlighted matches.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…future to build a <em>world</em> with <em>the</em> boy she loves…"
-        );
-
-        // Text containing all matches.
-        let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let mut matcher = builder.build(text, None);
-        // both should return 10 last words with a marker at the start and highlighted matches.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."
-        );
-
-        // Text containing a match unordered and a match ordered.
-        let text = "The world split void void void void void void void void void split the world void void";
-        let mut matcher = builder.build(text, None);
-        // crop should return 10 last words with a marker at the start.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…void void void void void <em>split</em> <em>the</em> <em>world</em> void void"
-        );
-    }
-
-    #[test]
-    fn format_highlight_crop_phrase_query() {
-        //! testing: https://github.com/meilisearch/meilisearch/issues/3975
+    pub fn rename_me(
+        format_options: Option<FormatOptions>,
+        text: &str,
+        query: &str,
+        expected_maybe_text: Option<&str>,
+    ) {
         let temp_index = TempIndex::new();
 
-        let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
+        // document will always contain the same exact text normally
+        // TODO: Describe this better and ask if this is actually the case
         temp_index
             .add_documents(documents!([
-                { "id": 1, "text": text }
+                { "id": 1, "text": text.to_string() },
             ]))
             .unwrap();
 
         let rtxn = temp_index.read_txn().unwrap();
-
-        let format_options = FormatOptions { highlight: true, crop: Some(10) };
-
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, query);
         let mut matcher = builder.build(text, None);
-        // should return 10 words with a marker at the start as well the end, and the highlighted matches.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…the power to split <em>the world</em> between those who embraced…"
-        );
 
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
-        let mut matcher = builder.build(text, None);
-        // should highlight "those" and the phrase "and those".
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…world between <em>those</em> who embraced progress <em>and those</em> who resisted…"
+        assert_eq!(
+            matcher.get_formatted_text(format_options),
+            expected_maybe_text.map(|v| v.to_string())
         );
+    }
 
-        let builder = MatcherBuilder::new_test(
-            &rtxn,
-            &temp_index,
-            "\"The groundbreaking invention had the power to split the world\"",
-        );
-        let mut matcher = builder.build(text, None);
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"<em>The groundbreaking invention had the power to split the world</em>…"
-        );
+    struct FormatVariations<'a> {
+        highlight_with_crop: Option<&'a str>,
+        highlight: Option<&'a str>,
+        crop: Option<&'a str>,
+    }
 
-        let builder = MatcherBuilder::new_test(
-            &rtxn,
-            &temp_index,
-            "\"The groundbreaking invention had the power to split the world between those\"",
-        );
-        let mut matcher = builder.build(text, None);
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"<em>The groundbreaking invention had the power to split the world</em>…"
-        );
+    impl<'a> FormatVariations<'a> {
+        fn get(&self) -> [(Option<FormatOptions>, Option<&'a str>); 5] {
+            [
+                (None, None),
+                (Some(FormatOptions { highlight: true, crop: Some(2) }), self.highlight_with_crop),
+                (Some(FormatOptions { highlight: true, crop: None }), self.highlight),
+                (Some(FormatOptions { highlight: false, crop: Some(2) }), self.crop),
+                (Some(FormatOptions { highlight: false, crop: None }), None),
+            ]
+        }
+    }
 
-        let builder = MatcherBuilder::new_test(
-            &rtxn,
-            &temp_index,
-            "\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"",
-        );
-        let mut matcher = builder.build(text, None);
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            // TODO: Should include exclamation mark without crop markers
-            @"…between those who <em>embraced progress and those who resisted change</em>…"
-        );
-
-        let builder = MatcherBuilder::new_test(
-            &rtxn,
-            &temp_index,
-            "\"groundbreaking invention\" \"split the world between\"",
-        );
-        let mut matcher = builder.build(text, None);
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…<em>groundbreaking invention</em> had the power to <em>split the world between</em>…"
-        );
-
-        let builder = MatcherBuilder::new_test(
-            &rtxn,
-            &temp_index,
-            "\"groundbreaking invention\" \"had the power to split the world between those\"",
-        );
-        let mut matcher = builder.build(text, None);
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…<em>invention</em> <em>had the power to split the world between those</em>…"
+    /// "Dei store fiskane eta dei små — dei liger under som minst förmå."
+    ///
+    /// (Men are like fish; the great ones devour the small.)
+    fn rename_me_with_base_text(
+        format_options: Option<FormatOptions>,
+        query: &str,
+        expected_maybe_text: Option<&str>,
+    ) {
+        rename_me(
+            format_options,
+            "Dei store fiskane eta dei små — dei liger under som minst förmå.",
+            query,
+            expected_maybe_text,
         );
     }
 
     #[test]
-    fn smaller_crop_size() {
-        //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
-        let temp_index = temp_index_with_documents();
-        let rtxn = temp_index.read_txn().unwrap();
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
+    fn empty_query() {
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("Dei store…"),
+            highlight: None,
+            crop: Some("Dei store…"),
+        }
+        .get())
+        {
+            rename_me_with_base_text(format_options, "", expected_maybe_text);
+        }
+    }
 
-        let text = "void void split the world void void.";
+    #[test]
+    fn only_separators() {
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some(":-…"),
+            highlight: None,
+            crop: Some(":-…"),
+        }
+        .get())
+        {
+            rename_me(format_options, ":-)", ":-)", expected_maybe_text);
+        }
+    }
 
-        // set a smaller crop size
-        let format_options = FormatOptions { highlight: false, crop: Some(2) };
-        let mut matcher = builder.build(text, None);
-        // because crop size < query size, partially format matches.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…split the…"
-        );
+    #[test]
+    fn highlight_end() {
+        // TODO: Why is "förmå" marked as prefix in located matching words?
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("…<em>minst</em> <em>förmå</em>."),
+            highlight: Some("Dei store fiskane eta dei små — dei liger under som <em>minst</em> <em>förmå</em>."),
+            crop: Some("…minst förmå."),
+        }
+        .get()) {
+            rename_me_with_base_text(format_options, "minst förmå", expected_maybe_text);
+        }
+    }
 
-        // set a smaller crop size
-        let format_options = FormatOptions { highlight: false, crop: Some(1) };
-        let mut matcher = builder.build(text, None);
-        // because crop size < query size, partially format matches.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…split…"
-        );
+    #[test]
+    fn highlight_beginning_and_middle() {
+        // TODO: Why is "store" marked as prefix in located matching words?
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("<em>Dei</em> <em>store</em>…"),
+            highlight: Some("<em>Dei</em> <em>store</em> fiskane eta <em>dei</em> små — <em>dei</em> liger under som minst förmå."),
+            crop: Some("Dei store…"),
+        }
+        .get()) {
+            rename_me_with_base_text(format_options, "Dei store", expected_maybe_text);
+        }
+    }
 
-        // set  crop size to 0
-        let format_options = FormatOptions { highlight: false, crop: Some(0) };
-        let mut matcher = builder.build(text, None);
-        // because crop size is 0, crop is ignored.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"void void split the world void void."
+    #[test]
+    fn partial_match_middle() {
+        // TODO: Is this intentional?
+        // Here the only interned word is "forma", hence it cannot find the searched prefix
+        // word "fo" inside "forma" within milli::search::new::matches::matching_words::MatchingWords::try_get_word_match
+        // `milli::search::new::query_term::QueryTerm::all_computed_derivations` might be at fault here
+
+        // interned words = ["forma"]
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("…<em>förmå</em>, på…"),
+            highlight: Some("altså, <em>förmå</em>, på en måte"),
+            crop: Some("…förmå, på…"),
+        }
+        .get())
+        {
+            rename_me(format_options, "altså, förmå, på en måte", "fo", expected_maybe_text);
+        }
+
+        // interned words = ["fo", "forma"]
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("…<em>fo</em> <em>fö</em>rmå…"),
+            highlight: Some("altså, <em>fo</em> <em>fö</em>rmå, på en måte"),
+            crop: Some("…fo förmå…"),
+        }
+        .get())
+        {
+            rename_me(format_options, "altså, fo förmå, på en måte", "fo", expected_maybe_text);
+        }
+    }
+
+    #[test]
+    fn partial_match_end() {
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("<em>förmå</em>, på…"),
+            highlight: Some("<em>förmå</em>, på en måte"),
+            crop: Some("förmå, på…"),
+        }
+        .get())
+        {
+            rename_me(format_options, "förmå, på en måte", "fo", expected_maybe_text);
+        }
+
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("<em>fo</em> <em>fö</em>rmå…"),
+            highlight: Some("<em>fo</em> <em>fö</em>rmå, på en måte"),
+            crop: Some("fo förmå…"),
+        }
+        .get())
+        {
+            rename_me(format_options, "fo förmå, på en måte", "fo", expected_maybe_text);
+        }
+    }
+
+    #[test]
+    fn partial_match_beginning() {
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("altså, <em>förmå</em>"),
+            highlight: Some("altså, <em>förmå</em>"),
+            crop: Some("altså, förmå"),
+        }
+        .get())
+        {
+            rename_me(format_options, "altså, förmå", "fo", expected_maybe_text);
+        }
+
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("…<em>fo</em> <em>fö</em>rmå"),
+            highlight: Some("altså, <em>fo</em> <em>fö</em>rmå"),
+            crop: Some("…fo förmå"),
+        }
+        .get())
+        {
+            rename_me(format_options, "altså, fo förmå", "fo", expected_maybe_text);
+        }
+    }
+
+    #[test]
+    fn separator_at_end() {
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("…<em>minst</em> förmå. , ;"),
+            highlight: Some("; , — dei liger under som <em>minst</em> förmå. , ;"),
+            crop: Some("…minst förmå. , ;"),
+        }
+        .get())
+        {
+            rename_me(
+                format_options,
+                "; , — dei liger under som minst förmå. , ;",
+                "minst",
+                expected_maybe_text,
+            );
+        }
+    }
+
+    #[test]
+    fn separator_at_beginning() {
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("; , — <em>dei</em> liger…"),
+            highlight: Some("; , — <em>dei</em> liger under som minst förmå. , ;"),
+            crop: Some("; , — dei liger…"),
+        }
+        .get())
+        {
+            rename_me(
+                format_options,
+                "; , — dei liger under som minst förmå. , ;",
+                "dei",
+                expected_maybe_text,
+            );
+        }
+    }
+
+    #[test]
+    fn phrase() {
+        for (format_options, expected_maybe_text) in (FormatVariations {
+            highlight_with_crop: Some("…<em>dei liger</em>…"),
+            highlight: Some(
+                "Dei store fiskane eta dei små — <em>dei liger</em> under som minst förmå.",
+            ),
+            crop: Some("…dei liger…"),
+        }
+        .get())
+        {
+            rename_me_with_base_text(format_options, "\"dei liger\"", expected_maybe_text);
+        }
+    }
+
+    #[test]
+    fn phrase_highlight_bigger_than_crop() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: true, crop: Some(1) }),
+            "\"dei liger\"",
+            Some("…<em>dei</em>…"),
         );
     }
 
     #[test]
-    fn partial_matches() {
-        let temp_index = temp_index_with_documents();
-        let rtxn = temp_index.read_txn().unwrap();
-        let mut builder =
-            MatcherBuilder::new_test(&rtxn, &temp_index, "the \"t he\" door \"do or\"");
-        builder.highlight_prefix("_".to_string());
-        builder.highlight_suffix("_".to_string());
+    fn phrase_bigger_than_crop() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: false, crop: Some(1) }),
+            "\"dei liger\"",
+            Some("…dei…"),
+        );
+    }
 
-        let format_options = FormatOptions { highlight: true, crop: None };
+    #[test]
+    fn phrase_highlight_crop_middle() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: true, crop: Some(4) }),
+            "\"dei liger\"",
+            Some("…små — <em>dei liger</em> under…"),
+        );
+    }
 
-        let text = "the do or die can't be he do and or isn't he";
-        let mut matcher = builder.build(text, None);
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"_the_ _do or_ die can't be he do and or isn'_t he_"
+    #[test]
+    fn phrase_crop_middle() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: false, crop: Some(4) }),
+            "\"dei liger\"",
+            Some("…små — dei liger under…"),
+        );
+    }
+
+    #[test]
+    fn phrase_highlight_crop_end() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: true, crop: Some(4) }),
+            "\"minst förmå\"",
+            Some("…under som <em>minst förmå</em>."),
+        );
+    }
+
+    #[test]
+    fn phrase_crop_end() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: false, crop: Some(4) }),
+            "\"minst förmå\"",
+            Some("…under som minst förmå."),
+        );
+    }
+
+    #[test]
+    fn phrase_highlight_crop_beginning() {
+        rename_me_with_base_text(
+            Some(FormatOptions { highlight: true, crop: Some(4) }),
+            "\"Dei store\"",
+            Some("<em>Dei store</em> fiskane eta…"),
         );
     }
 }
diff --git a/crates/milli/src/search/new/matches/simple_token_kind.rs b/crates/milli/src/search/new/matches/simple_token_kind.rs
deleted file mode 100644
index b34a8c985..000000000
--- a/crates/milli/src/search/new/matches/simple_token_kind.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-use charabia::{SeparatorKind, Token, TokenKind};
-
-pub enum SimpleTokenKind {
-    Separator(SeparatorKind),
-    NotSeparator,
-}
-
-impl SimpleTokenKind {
-    pub fn new(token: &&Token<'_>) -> Self {
-        match token.kind {
-            TokenKind::Separator(separaor_kind) => Self::Separator(separaor_kind),
-            _ => Self::NotSeparator,
-        }
-    }
-}
diff --git a/crates/milli/src/search/new/query_term/mod.rs b/crates/milli/src/search/new/query_term/mod.rs
index ba8964e34..748248fc3 100644
--- a/crates/milli/src/search/new/query_term/mod.rs
+++ b/crates/milli/src/search/new/query_term/mod.rs
@@ -489,8 +489,7 @@ impl QueryTerm {
         let mut words = BTreeSet::new();
         let mut phrases = BTreeSet::new();
 
-        let ZeroTypoTerm { phrase, exact: zero_typo, prefix_of, synonyms, use_prefix_db: _ } =
-            &self.zero_typo;
+        let ZeroTypoTerm { phrase, exact: zero_typo, prefix_of, synonyms, .. } = &self.zero_typo;
         words.extend(zero_typo.iter().copied());
         words.extend(prefix_of.iter().copied());
         phrases.extend(phrase.iter().copied());