diff --git a/crates/milli/src/search/new/matches/mod.rs b/crates/milli/src/search/new/matches/mod.rs
index 80a19948e..d9009d92b 100644
--- a/crates/milli/src/search/new/matches/mod.rs
+++ b/crates/milli/src/search/new/matches/mod.rs
@@ -8,6 +8,7 @@ use std::cmp::{max, min};
 
 use charabia::{Language, SeparatorKind, Token, Tokenizer};
 use either::Either;
+use itertools::Itertools;
 pub use matching_words::MatchingWords;
 use matching_words::{MatchType, PartialMatch};
 use r#match::{Match, MatchPosition};
@@ -229,12 +230,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                 .iter()
                 .map(|m| MatchBounds {
                     start: tokens[m.get_first_token_pos()].byte_start,
-                    length: (m.get_first_token_pos()..m.get_last_token_pos() + 1)
-                        .map(|i| tokens[i].clone())
-                        .flat_map(|token| token.char_map.clone().unwrap_or(vec![(1, 1); token.char_end - token.char_start] /* Some token doesn't have a char map, here we treat them as single byte chars. */))
-                        .map(|(original, _)| original as usize)
-                        .take(m.char_count)
-                        .sum(),
+                    length: self.calc_byte_length(&tokens, m),
                     indices: if array_indices.is_empty() {
                         None
                     } else {
@@ -245,6 +241,18 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         }
     }
 
+    fn calc_byte_length(&self, tokens: &Vec<Token<'t>>, m: &Match) -> usize {
+        (m.get_first_token_pos()..=m.get_last_token_pos())
+            .flat_map(|i| match &tokens[i].char_map {
+                Some(char_map) => {
+                    char_map.iter().map(|(original, _)| *original as usize).collect_vec()
+                }
+                None => tokens[i].lemma().chars().map(|c| c.len_utf8()).collect_vec(),
+            })
+            .take(m.char_count)
+            .sum()
+    }
+
     /// Returns the bounds in byte index of the crop window.
     fn crop_bounds(&self, tokens: &[Token<'_>], matches: &[Match], crop_size: usize) -> [usize; 2] {
         let (