Chores: Enhance smart-crop code comments

2025-06-14 12:01:36 +02:00 · 2022-07-07 16:28:09 +02:00 · 2022-07-07 16:28:09 +02:00 · 5d79617a56
commit 5d79617a56
parent ce90fc628a
1 changed files with 16 additions and 1 deletions
--- a/milli/src/search/matches/mod.rs
+++ b/milli/src/search/matches/mod.rs
@ -11,6 +11,7 @@ const DEFAULT_CROP_MARKER: &'static str = "…";
 const DEFAULT_HIGHLIGHT_PREFIX: &'static str = "<em>";
 const DEFAULT_HIGHLIGHT_SUFFIX: &'static str = "</em>";
 /// Structure used to build a Matcher allowing to customize formating tags.
 pub struct MatcherBuilder<'a, A> {
    matching_words: MatchingWords,
    tokenizer: Tokenizer<'a, A>,
@ -100,6 +101,8 @@ pub struct MatchBounds {
    pub length: usize,
 }
 /// Structure used to analize a string, compute words that match,
 /// and format the source string returning an highlighted and croped sub-string.
 pub struct Matcher<'t, 'm, A> {
    text: &'t str,
    matching_words: &'m MatchingWords,
@ -113,6 +116,8 @@ pub struct Matcher<'t, 'm, A> {
 impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> {
    /// Iterates over tokens and save any of them that matches the query.
    fn compute_matches(&mut self) -> &mut Self {
        /// some word are counted as matches only if there are close together and in the good order,
        /// compute_partial_match peek into next words to validate if the match is complete.
        fn compute_partial_match<'a>(
            mut partial: PartialMatch,
            token_position: usize,
@ -246,9 +251,14 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> {
        // matches needs to be counted in the crop len.
        let mut remaining_words = crop_size + first_match_word_position - last_match_word_position;
        // create the initial state of the crop window: 2 iterators starting from the matches positions,
        // a reverse iterator starting from the first match token position and going trew the beginning of the text,
        let mut before_tokens = tokens[..first_match_token_position].iter().rev().peekable();
        // an iterator starting from the last match token position and going trew the end of the text.
        let mut after_tokens = tokens[last_match_token_position..].iter().peekable();
        // gross the crop window peeking in both drections
        // until the window contains the good number of words:
        while remaining_words > 0 {
            let before_token = before_tokens.peek().map(|t| t.separator_kind());
            let after_token = after_tokens.peek().map(|t| t.separator_kind());
@ -315,6 +325,7 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> {
            }
        }
        // finally, keep the byte index of each bounds of the crop window.
        let crop_byte_start = before_tokens.next().map_or(0, |t| t.byte_end);
        let crop_byte_end = after_tokens.next().map_or(self.text.len(), |t| t.byte_start);
@ -353,7 +364,7 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> {
        (uniq_score, distance_score, order_score)
    }
-    /// Returns the matches interval where the score computed by match_interval_score is maximal.
+    /// Returns the matches interval where the score computed by match_interval_score is the best.
    fn find_best_match_interval<'a>(&self, matches: &'a [Match], crop_size: usize) -> &'a [Match] {
        // we compute the matches interval if we have at least 2 matches.
        if matches.len() > 1 {
@ -408,6 +419,8 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> {
        } else {
            match &self.matches {
                Some((tokens, matches)) => {
                    // If the text have to be croped,
                    // compute the best interval to crop around.
                    let matches = match format_options.crop {
                        Some(crop_size) if crop_size > 0 => {
                            self.find_best_match_interval(matches, crop_size)
@ -415,6 +428,8 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> {
                        _ => matches,
                    };
                    // If the text have to be croped,
                    // crop around the best interval.
                    let (byte_start, byte_end) = match format_options.crop {
                        Some(crop_size) if crop_size > 0 => {
                            self.crop_bounds(tokens, matches, crop_size)