From 35c16ad047f2f7ff082ffa627bd25e621698029c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 30 Mar 2023 13:15:43 +0200 Subject: [PATCH] Use new term matching strategy logic in words ranking rule --- milli/src/search/new/words.rs | 54 +++++++++++------------------------ 1 file changed, 17 insertions(+), 37 deletions(-) diff --git a/milli/src/search/new/words.rs b/milli/src/search/new/words.rs index dc798e55d..263e9220c 100644 --- a/milli/src/search/new/words.rs +++ b/milli/src/search/new/words.rs @@ -1,18 +1,16 @@ -use std::collections::BTreeSet; - -use roaring::RoaringBitmap; - use super::logger::SearchLogger; -use super::query_graph::QueryNodeData; +use super::query_graph::QueryNode; use super::resolve_query_graph::compute_query_graph_docids; +use super::small_bitmap::SmallBitmap; use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; use crate::{Result, TermsMatchingStrategy}; +use roaring::RoaringBitmap; pub struct Words { exhausted: bool, // TODO: remove query_graph: Option, iterating: bool, // TODO: remove - positions_to_remove: Vec, + nodes_to_remove: Vec>, terms_matching_strategy: TermsMatchingStrategy, } impl Words { @@ -21,7 +19,7 @@ impl Words { exhausted: true, query_graph: None, iterating: false, - positions_to_remove: vec![], + nodes_to_remove: vec![], terms_matching_strategy, } } @@ -40,26 +38,14 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words { ) -> Result<()> { self.exhausted = false; self.query_graph = Some(parent_query_graph.clone()); - - let positions_to_remove = match self.terms_matching_strategy { + self.nodes_to_remove = match self.terms_matching_strategy { TermsMatchingStrategy::Last => { - let mut all_positions = BTreeSet::new(); - for (_, n) in parent_query_graph.nodes.iter() { - match &n.data { - QueryNodeData::Term(term) => { - all_positions.extend(term.positions.clone()); - } - QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => {} - } - } - let mut r: Vec = all_positions.into_iter().collect(); - // don't remove the first term - r.remove(0); - r + parent_query_graph.removal_order_for_terms_matching_strategy_last() + } + TermsMatchingStrategy::All => { + vec![] } - TermsMatchingStrategy::All => vec![], }; - self.positions_to_remove = positions_to_remove; self.iterating = true; Ok(()) } @@ -83,18 +69,12 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words { let this_bucket = compute_query_graph_docids(ctx, query_graph, universe)?; let child_query_graph = query_graph.clone(); - loop { - if self.positions_to_remove.is_empty() { - self.exhausted = true; - break; - } else { - let position_to_remove = self.positions_to_remove.pop().unwrap(); - let did_delete_any_node = - query_graph.remove_words_starting_at_position(position_to_remove); - if did_delete_any_node { - break; - } - } + + if self.nodes_to_remove.is_empty() { + self.exhausted = true; + } else { + let nodes_to_remove = self.nodes_to_remove.pop().unwrap(); + query_graph.remove_nodes(&nodes_to_remove.iter().collect::>()); } Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket })) @@ -107,7 +87,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words { ) { self.iterating = false; self.exhausted = true; - self.positions_to_remove = vec![]; + self.nodes_to_remove = vec![]; self.query_graph = None; } }