Use new term matching strategy logic in words ranking rule

This commit is contained in:
Loïc Lecrenier 2023-03-30 13:15:43 +02:00
parent 2997d1f186
commit 35c16ad047

View File

@ -1,18 +1,16 @@
use std::collections::BTreeSet;
use roaring::RoaringBitmap;
use super::logger::SearchLogger; use super::logger::SearchLogger;
use super::query_graph::QueryNodeData; use super::query_graph::QueryNode;
use super::resolve_query_graph::compute_query_graph_docids; use super::resolve_query_graph::compute_query_graph_docids;
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
use crate::{Result, TermsMatchingStrategy}; use crate::{Result, TermsMatchingStrategy};
use roaring::RoaringBitmap;
pub struct Words { pub struct Words {
exhausted: bool, // TODO: remove exhausted: bool, // TODO: remove
query_graph: Option<QueryGraph>, query_graph: Option<QueryGraph>,
iterating: bool, // TODO: remove iterating: bool, // TODO: remove
positions_to_remove: Vec<i8>, nodes_to_remove: Vec<SmallBitmap<QueryNode>>,
terms_matching_strategy: TermsMatchingStrategy, terms_matching_strategy: TermsMatchingStrategy,
} }
impl Words { impl Words {
@ -21,7 +19,7 @@ impl Words {
exhausted: true, exhausted: true,
query_graph: None, query_graph: None,
iterating: false, iterating: false,
positions_to_remove: vec![], nodes_to_remove: vec![],
terms_matching_strategy, terms_matching_strategy,
} }
} }
@ -40,26 +38,14 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words {
) -> Result<()> { ) -> Result<()> {
self.exhausted = false; self.exhausted = false;
self.query_graph = Some(parent_query_graph.clone()); self.query_graph = Some(parent_query_graph.clone());
self.nodes_to_remove = match self.terms_matching_strategy {
let positions_to_remove = match self.terms_matching_strategy {
TermsMatchingStrategy::Last => { TermsMatchingStrategy::Last => {
let mut all_positions = BTreeSet::new(); parent_query_graph.removal_order_for_terms_matching_strategy_last()
for (_, n) in parent_query_graph.nodes.iter() {
match &n.data {
QueryNodeData::Term(term) => {
all_positions.extend(term.positions.clone());
} }
QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => {} TermsMatchingStrategy::All => {
vec![]
} }
}
let mut r: Vec<i8> = all_positions.into_iter().collect();
// don't remove the first term
r.remove(0);
r
}
TermsMatchingStrategy::All => vec![],
}; };
self.positions_to_remove = positions_to_remove;
self.iterating = true; self.iterating = true;
Ok(()) Ok(())
} }
@ -83,18 +69,12 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words {
let this_bucket = compute_query_graph_docids(ctx, query_graph, universe)?; let this_bucket = compute_query_graph_docids(ctx, query_graph, universe)?;
let child_query_graph = query_graph.clone(); let child_query_graph = query_graph.clone();
loop {
if self.positions_to_remove.is_empty() { if self.nodes_to_remove.is_empty() {
self.exhausted = true; self.exhausted = true;
break;
} else { } else {
let position_to_remove = self.positions_to_remove.pop().unwrap(); let nodes_to_remove = self.nodes_to_remove.pop().unwrap();
let did_delete_any_node = query_graph.remove_nodes(&nodes_to_remove.iter().collect::<Vec<_>>());
query_graph.remove_words_starting_at_position(position_to_remove);
if did_delete_any_node {
break;
}
}
} }
Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket })) Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket }))
@ -107,7 +87,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words {
) { ) {
self.iterating = false; self.iterating = false;
self.exhausted = true; self.exhausted = true;
self.positions_to_remove = vec![]; self.nodes_to_remove = vec![];
self.query_graph = None; self.query_graph = None;
} }
} }