From 37a9d64c4441bb6a4a199ad018ab4ddb44d4d958 Mon Sep 17 00:00:00 2001
From: "F. Levi" <55688616+flevi29@users.noreply.github.com>
Date: Tue, 1 Oct 2024 22:52:01 +0300
Subject: [PATCH] Fix failing test, refactor

---
 milli/src/search/new/matches/mod.rs | 44 ++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 1552de8aa..ae1264482 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -245,8 +245,7 @@ struct MatchIntervalWithScore {
 impl MatchIntervalWithScore {
     /// Returns the matches interval where the score computed by match_interval_score is the best.
     fn find_best_match_interval(matches: &[Match], crop_size: usize) -> &[Match] {
-        let matches_len = matches.len();
-        if matches_len <= 1 {
+        if matches.len() <= 1 {
             return matches;
         }
 
@@ -303,7 +302,7 @@ impl MatchIntervalWithScore {
         }
 
         // compute the last interval score and compare it to the best one.
-        let interval_last = matches_len - 1;
+        let interval_last = matches.len() - 1;
         // if it's the last match with itself, we need to make sure it's
         // not a phrase longer than the crop window
         if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size {
@@ -451,28 +450,39 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
         crop_size: usize,
     ) -> (usize, usize) {
         // if there is no match, we start from the beginning of the string by default.
-        let first_match_first_word_position =
-            matches.first().map(|m| m.get_first_word_pos()).unwrap_or(0);
-        let first_match_first_token_position =
-            matches.first().map(|m| m.get_first_token_pos()).unwrap_or(0);
-        let last_match_last_word_position =
-            matches.last().map(|m| m.get_last_word_pos()).unwrap_or(0);
-        let last_match_last_token_position =
-            matches.last().map(|m| m.get_last_token_pos()).unwrap_or(0);
+        let (matches_size, first_match_first_token_position, last_match_last_token_position) =
+            if !matches.is_empty() {
+                let matches_first = matches.first().unwrap();
+                let matches_last = matches.last().unwrap();
 
-        let matches_window_len =
-            last_match_last_word_position - first_match_first_word_position + 1;
+                (
+                    matches_last.get_last_word_pos() - matches_first.get_first_word_pos() + 1,
+                    matches_first.get_first_token_pos(),
+                    matches_last.get_last_token_pos(),
+                )
+            } else {
+                (0, 0, 0)
+            };
 
-        if crop_size >= matches_window_len {
+        if crop_size >= matches_size {
             // matches needs to be counted in the crop len.
-            let mut remaining_words = crop_size - matches_window_len;
+            let mut remaining_words = crop_size - matches_size;
+
+            let last_match_last_token_position_plus_one = last_match_last_token_position + 1;
+            let after_tokens_starting_index = if matches_size == 0 {
+                0
+            } else if last_match_last_token_position_plus_one < tokens.len() {
+                last_match_last_token_position_plus_one
+            } else {
+                tokens.len()
+            };
 
             // create the initial state of the crop window: 2 iterators starting from the matches positions,
             // a reverse iterator starting from the first match token position and going towards the beginning of the text,
             let mut before_tokens =
                 tokens[..first_match_first_token_position].iter().rev().peekable();
             // an iterator starting from the last match token position and going towards the end of the text.
-            let mut after_tokens = tokens[last_match_last_token_position + 1..].iter().peekable();
+            let mut after_tokens = tokens[after_tokens_starting_index..].iter().peekable();
 
             // grows the crop window peeking in both directions
             // until the window contains the good number of words:
@@ -553,7 +563,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
             (crop_byte_start, crop_byte_end)
         } else {
             // there's one match and it's longer than the crop window, so we have to advance inward
-            let mut remaining_extra_words = matches_window_len - crop_size;
+            let mut remaining_extra_words = matches_size - crop_size;
             let mut tokens_from_end =
                 tokens[..=last_match_last_token_position].iter().rev().peekable();