Add indices field to _matchesPosition to specify where in an array a match comes from (#5005)

* Remove unreachable code * Add `indices` field to `MatchBounds` For matches inside arrays, this field holds the indices of the array elements that matched. For example, searching for `cat` inside `{ "a": ["dog", "cat", "fox"] }` would return `indices: [1]`. For nested arrays, this contains multiple indices, starting with the one for the top-most array. For matches in fields without arrays, `indices` is not serialized (does not exist) to save space.
2025-07-03 20:07:09 +02:00 · 2024-11-20 01:00:43 +01:00 · 2024-11-20 01:00:43 +01:00 · 057fcb3993
commit 057fcb3993
parent c1d8ee2a8d
4 changed files with 139 additions and 97 deletions
--- a/crates/milli/src/search/new/matches/mod.rs
+++ b/crates/milli/src/search/new/matches/mod.rs
@ -105,6 +105,8 @@ impl FormatOptions {
 pub struct MatchBounds {
    pub start: usize,
    pub length: usize,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub indices: Option<Vec<usize>>,
 }

 /// Structure used to analyze a string, compute words that match,
@ -220,15 +222,20 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
    }

    /// Returns boundaries of the words that match the query.
-    pub fn matches(&mut self) -> Vec<MatchBounds> {
+    pub fn matches(&mut self, array_indices: &[usize]) -> Vec<MatchBounds> {
        match &self.matches {
-            None => self.compute_matches().matches(),
+            None => self.compute_matches().matches(array_indices),
            Some((tokens, matches)) => matches
                .iter()
                .map(|m| MatchBounds {
                    start: tokens[m.get_first_token_pos()].byte_start,
                    // TODO: Why is this in chars, while start is in bytes?
                    length: m.char_count,
+                    indices: if array_indices.is_empty() {
+                        None
+                    } else {
+                        Some(array_indices.to_owned())
+                    },
                })
                .collect(),
        }