Fix bug in exact_attribute rr implementation

2025-07-15 13:58:36 +02:00 · 2023-04-26 10:40:05 +02:00 · 2023-04-26 10:40:05 +02:00 · 55bad07c16
commit 55bad07c16
parent 3421125a55
2 changed files with 24 additions and 23 deletions
--- a/milli/src/search/new/exact_attribute.rs
+++ b/milli/src/search/new/exact_attribute.rs
@ -91,13 +91,14 @@ impl State {
        universe: &RoaringBitmap,
        query_graph: &QueryGraph,
    ) -> Result<Self> {
-        // An ordered list of the (remaining) query terms, with data extracted from them:
-        // 0. exact subterm. If it doesn't exist, the term is skipped.
-        // 1. start position of the term
-        // 2. id of the term
-        let mut count_all_positions = 0;
+        struct ExactTermInfo {
+            exact_term: ExactTerm,
+            start_position: u16,
+            start_term_id: u8,
+            position_count: usize,
+        }

-        let mut exact_term_position_ids: Vec<(ExactTerm, u16, u8)> =
+        let mut exact_terms: Vec<ExactTermInfo> =
            Vec::with_capacity(query_graph.nodes.len() as usize);
        for (_, node) in query_graph.nodes.iter() {
            match &node.data {
@ -107,34 +108,35 @@ impl State {
                    } else {
                        continue;
                    };
-                    count_all_positions += term.positions.len();
-                    exact_term_position_ids.push((
+                    exact_terms.push(ExactTermInfo {
                        exact_term,
-                        *term.positions.start(),
-                        *term.term_ids.start(),
-                    ))
+                        start_position: *term.positions.start(),
+                        start_term_id: *term.term_ids.start(),
+                        position_count: term.positions.len(),
+                    });
                }
                QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => continue,
            }
        }

-        exact_term_position_ids.sort_by_key(|(_, _, id)| *id);
-        exact_term_position_ids.dedup_by_key(|(_, _, id)| *id);
+        exact_terms.sort_by_key(|x| x.start_term_id);
+        exact_terms.dedup_by_key(|x| x.start_term_id);
+        let count_all_positions = exact_terms.iter().fold(0, |acc, x| acc + x.position_count);

        // bail if there is a "hole" (missing word) in remaining query graph
-        if let Some((_, _, first_id)) = exact_term_position_ids.first() {
-            if *first_id != 0 {
+        if let Some(e) = exact_terms.first() {
+            if e.start_term_id != 0 {
                return Ok(State::Empty(query_graph.clone()));
            }
        } else {
            return Ok(State::Empty(query_graph.clone()));
        }
        let mut previous_id = 0;
-        for (_, _, id) in exact_term_position_ids.iter().copied() {
-            if id < previous_id || id - previous_id > 1 {
+        for e in exact_terms.iter() {
+            if e.start_term_id < previous_id || e.start_term_id - previous_id > 1 {
                return Ok(State::Empty(query_graph.clone()));
            } else {
-                previous_id = id;
+                previous_id = e.start_term_id;
            }
        }

@ -147,10 +149,9 @@ impl State {
        // first check that for each term, there exists some attribute that has this term at the correct position
        //"word-position-docids";
        let mut candidates = universe.clone();
-        let words_positions: Vec<(Vec<_>, _)> = exact_term_position_ids
+        let words_positions: Vec<(Vec<_>, _)> = exact_terms
            .iter()
-            .copied()
-            .map(|(term, position, _)| (term.interned_words(ctx).collect(), position))
+            .map(|e| (e.exact_term.interned_words(ctx).collect(), e.start_position))
            .collect();
        for (words, position) in &words_positions {
            if candidates.is_empty() {
--- a/milli/tests/assets/test_set.ndjson
+++ b/milli/tests/assets/test_set.ndjson
@ -134,7 +134,7 @@
    "typo_rank": 0,
    "proximity_rank": 0,
    "attribute_rank": 1,
-    "exact_rank": 3,
+    "exact_rank": 1,
    "asc_desc_rank": 5,
    "sort_by_rank": 2,
    "geo_rank": 34692,
@ -369,7 +369,7 @@
    "typo_rank": 0,
    "proximity_rank": 0,
    "attribute_rank": 1,
-    "exact_rank": 2,
+    "exact_rank": 0,
    "asc_desc_rank": 2,
    "sort_by_rank": 1,
    "geo_rank": 9339230,