MeiliSearch/milli/src/search/new/words.rs

96 lines
2.9 KiB
Rust
Raw Normal View History

2023-02-22 15:34:37 +01:00
use super::logger::SearchLogger;
use super::query_graph::QueryNode;
use super::resolve_query_graph::compute_query_graph_docids;
use super::small_bitmap::SmallBitmap;
2023-03-14 16:37:47 +01:00
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
use crate::{Result, TermsMatchingStrategy};
use roaring::RoaringBitmap;
pub struct Words {
exhausted: bool, // TODO: remove
query_graph: Option<QueryGraph>,
iterating: bool, // TODO: remove
nodes_to_remove: Vec<SmallBitmap<QueryNode>>,
terms_matching_strategy: TermsMatchingStrategy,
}
impl Words {
pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
Self {
exhausted: true,
query_graph: None,
iterating: false,
nodes_to_remove: vec![],
terms_matching_strategy,
}
}
}
2023-03-13 14:03:48 +01:00
impl<'ctx> RankingRule<'ctx, QueryGraph> for Words {
2023-02-22 15:34:37 +01:00
fn id(&self) -> String {
"words".to_owned()
}
fn start_iteration(
&mut self,
2023-03-13 14:03:48 +01:00
_ctx: &mut SearchContext<'ctx>,
2023-02-28 11:49:24 +01:00
_logger: &mut dyn SearchLogger<QueryGraph>,
_parent_candidates: &RoaringBitmap,
parent_query_graph: &QueryGraph,
) -> Result<()> {
self.exhausted = false;
self.query_graph = Some(parent_query_graph.clone());
self.nodes_to_remove = match self.terms_matching_strategy {
TermsMatchingStrategy::Last => {
2023-03-30 14:01:52 +02:00
let mut ns = parent_query_graph.removal_order_for_terms_matching_strategy_last();
ns.reverse();
ns
}
TermsMatchingStrategy::All => {
vec![]
}
};
self.iterating = true;
Ok(())
}
fn next_bucket(
&mut self,
2023-03-13 14:03:48 +01:00
ctx: &mut SearchContext<'ctx>,
2023-02-22 15:34:37 +01:00
logger: &mut dyn SearchLogger<QueryGraph>,
universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
assert!(self.iterating);
assert!(universe.len() > 1);
if self.exhausted {
return Ok(None);
}
let Some(query_graph) = &mut self.query_graph else { panic!() };
2023-02-21 13:57:34 +01:00
logger.log_words_state(query_graph);
let this_bucket = compute_query_graph_docids(ctx, query_graph, universe)?;
2023-02-21 13:57:34 +01:00
let child_query_graph = query_graph.clone();
if self.nodes_to_remove.is_empty() {
self.exhausted = true;
} else {
let nodes_to_remove = self.nodes_to_remove.pop().unwrap();
query_graph.remove_nodes_keep_edges(&nodes_to_remove.iter().collect::<Vec<_>>());
}
Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket }))
}
fn end_iteration(
&mut self,
2023-03-13 14:03:48 +01:00
_ctx: &mut SearchContext<'ctx>,
2023-02-22 15:34:37 +01:00
_logger: &mut dyn SearchLogger<QueryGraph>,
) {
self.iterating = false;
self.exhausted = true;
self.nodes_to_remove = vec![];
self.query_graph = None;
}
}