Add sort ranking rule to new search impl

This commit is contained in:
Loïc Lecrenier 2023-02-28 12:42:29 +01:00
parent 600e3dd1c5
commit 71f18e4379
4 changed files with 31 additions and 23 deletions

View File

@ -68,7 +68,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
fn initial_universe(&mut self, universe: &RoaringBitmap) { fn initial_universe(&mut self, universe: &RoaringBitmap) {
self.initial_universe = Some(universe.clone()); self.initial_universe = Some(universe.clone());
} }
fn ranking_rules(&mut self, rr: &[Box<dyn RankingRule<QueryGraph>>]) { fn ranking_rules(&mut self, rr: &[&mut dyn RankingRule<QueryGraph>]) {
self.ranking_rules_ids = Some(rr.iter().map(|rr| rr.id()).collect()); self.ranking_rules_ids = Some(rr.iter().map(|rr| rr.id()).collect());
} }

View File

@ -17,7 +17,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
fn initial_universe(&mut self, _universe: &RoaringBitmap) {} fn initial_universe(&mut self, _universe: &RoaringBitmap) {}
fn ranking_rules(&mut self, _rr: &[Box<dyn RankingRule<Q>>]) {} fn ranking_rules(&mut self, _rr: &[&mut dyn RankingRule<Q>]) {}
fn start_iteration_ranking_rule<'transaction>( fn start_iteration_ranking_rule<'transaction>(
&mut self, &mut self,
_ranking_rule_idx: usize, _ranking_rule_idx: usize,
@ -67,7 +67,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
fn initial_query(&mut self, query: &Q); fn initial_query(&mut self, query: &Q);
fn initial_universe(&mut self, universe: &RoaringBitmap); fn initial_universe(&mut self, universe: &RoaringBitmap);
fn ranking_rules(&mut self, rr: &[Box<dyn RankingRule<Q>>]); fn ranking_rules(&mut self, rr: &[&mut dyn RankingRule<Q>]);
fn start_iteration_ranking_rule<'transaction>( fn start_iteration_ranking_rule<'transaction>(
&mut self, &mut self,

View File

@ -8,7 +8,7 @@ use super::QueryGraph;
use crate::new::graph_based_ranking_rule::GraphBasedRankingRule; use crate::new::graph_based_ranking_rule::GraphBasedRankingRule;
use crate::new::ranking_rule_graph::proximity::ProximityGraph; use crate::new::ranking_rule_graph::proximity::ProximityGraph;
use crate::new::words::Words; use crate::new::words::Words;
// use crate::search::new::sort::Sort; use crate::search::new::sort::Sort;
use crate::{Filter, Index, Result, TermsMatchingStrategy}; use crate::{Filter, Index, Result, TermsMatchingStrategy};
pub trait RankingRuleOutputIter<'transaction, Query> { pub trait RankingRuleOutputIter<'transaction, Query> {
@ -122,12 +122,12 @@ pub fn execute_search<'transaction>(
length: usize, length: usize,
logger: &mut dyn SearchLogger<QueryGraph>, logger: &mut dyn SearchLogger<QueryGraph>,
) -> Result<Vec<u32>> { ) -> Result<Vec<u32>> {
let words = Words::new(TermsMatchingStrategy::Last); let words = &mut Words::new(TermsMatchingStrategy::Last);
// let sort = Sort::new(index, txn, "sort1".to_owned(), true)?; let sort = &mut Sort::new(index, txn, "release_date".to_owned(), true)?;
let proximity = GraphBasedRankingRule::<ProximityGraph>::new("proximity".to_owned()); let proximity = &mut GraphBasedRankingRule::<ProximityGraph>::new("proximity".to_owned());
// TODO: ranking rules given as argument // TODO: ranking rules given as argument
let mut ranking_rules: Vec<Box<dyn RankingRule<'transaction, QueryGraph>>> = let mut ranking_rules: Vec<&mut dyn RankingRule<'transaction, QueryGraph>> =
vec![Box::new(words), Box::new(proximity) /* Box::new(sort) */]; vec![words, proximity, sort];
logger.ranking_rules(&ranking_rules); logger.ranking_rules(&ranking_rules);
@ -142,7 +142,7 @@ pub fn execute_search<'transaction>(
} }
let ranking_rules_len = ranking_rules.len(); let ranking_rules_len = ranking_rules.len();
logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query_graph, &universe); logger.start_iteration_ranking_rule(0, ranking_rules[0], query_graph, &universe);
ranking_rules[0].start_iteration(index, txn, db_cache, logger, &universe, query_graph)?; ranking_rules[0].start_iteration(index, txn, db_cache, logger, &universe, query_graph)?;
let mut candidates = vec![RoaringBitmap::default(); ranking_rules_len]; let mut candidates = vec![RoaringBitmap::default(); ranking_rules_len];
@ -152,9 +152,10 @@ pub fn execute_search<'transaction>(
macro_rules! back { macro_rules! back {
() => { () => {
assert!(candidates[cur_ranking_rule_index].is_empty());
logger.end_iteration_ranking_rule( logger.end_iteration_ranking_rule(
cur_ranking_rule_index, cur_ranking_rule_index,
ranking_rules[cur_ranking_rule_index].as_ref(), ranking_rules[cur_ranking_rule_index],
&candidates[cur_ranking_rule_index], &candidates[cur_ranking_rule_index],
); );
candidates[cur_ranking_rule_index].clear(); candidates[cur_ranking_rule_index].clear();
@ -182,7 +183,7 @@ pub fn execute_search<'transaction>(
if cur_offset + (candidates.len() as usize) < from { if cur_offset + (candidates.len() as usize) < from {
logger.skip_bucket_ranking_rule( logger.skip_bucket_ranking_rule(
cur_ranking_rule_index, cur_ranking_rule_index,
ranking_rules[cur_ranking_rule_index].as_ref(), ranking_rules[cur_ranking_rule_index],
&candidates, &candidates,
); );
} else { } else {
@ -191,7 +192,7 @@ pub fn execute_search<'transaction>(
all_candidates.split_at(from - cur_offset); all_candidates.split_at(from - cur_offset);
logger.skip_bucket_ranking_rule( logger.skip_bucket_ranking_rule(
cur_ranking_rule_index, cur_ranking_rule_index,
ranking_rules[cur_ranking_rule_index].as_ref(), ranking_rules[cur_ranking_rule_index],
&skipped_candidates.into_iter().collect(), &skipped_candidates.into_iter().collect(),
); );
let candidates = candidates let candidates = candidates
@ -216,6 +217,7 @@ pub fn execute_search<'transaction>(
// The universe for this bucket is zero or one element, so we don't need to sort // The universe for this bucket is zero or one element, so we don't need to sort
// anything, just extend the results and go back to the parent ranking rule. // anything, just extend the results and go back to the parent ranking rule.
if candidates[cur_ranking_rule_index].len() <= 1 { if candidates[cur_ranking_rule_index].len() <= 1 {
candidates[cur_ranking_rule_index].clear();
maybe_add_to_results!(&candidates[cur_ranking_rule_index]); maybe_add_to_results!(&candidates[cur_ranking_rule_index]);
back!(); back!();
continue; continue;
@ -223,7 +225,7 @@ pub fn execute_search<'transaction>(
logger.next_bucket_ranking_rule( logger.next_bucket_ranking_rule(
cur_ranking_rule_index, cur_ranking_rule_index,
ranking_rules[cur_ranking_rule_index].as_ref(), ranking_rules[cur_ranking_rule_index],
&candidates[cur_ranking_rule_index], &candidates[cur_ranking_rule_index],
); );
@ -232,6 +234,7 @@ pub fn execute_search<'transaction>(
continue; continue;
}; };
assert!(candidates[cur_ranking_rule_index].is_superset(&next_bucket.candidates));
candidates[cur_ranking_rule_index] -= &next_bucket.candidates; candidates[cur_ranking_rule_index] -= &next_bucket.candidates;
if cur_ranking_rule_index == ranking_rules_len - 1 if cur_ranking_rule_index == ranking_rules_len - 1
@ -246,7 +249,7 @@ pub fn execute_search<'transaction>(
candidates[cur_ranking_rule_index] = next_bucket.candidates.clone(); candidates[cur_ranking_rule_index] = next_bucket.candidates.clone();
logger.start_iteration_ranking_rule( logger.start_iteration_ranking_rule(
cur_ranking_rule_index, cur_ranking_rule_index,
ranking_rules[cur_ranking_rule_index].as_ref(), ranking_rules[cur_ranking_rule_index],
&next_bucket.query, &next_bucket.query,
&candidates[cur_ranking_rule_index], &candidates[cur_ranking_rule_index],
); );

View File

@ -16,19 +16,16 @@ use crate::{
Result, Result,
}; };
// TODO: The implementation of Sort is not correct:
// (1) it should not return documents it has already returned (does the current implementation have the same bug?)
// (2) at the end, it should return all the remaining documents (this could be ensured at the trait level?)
pub struct Sort<'transaction, Query> { pub struct Sort<'transaction, Query> {
field_name: String, field_name: String,
field_id: Option<FieldId>, field_id: Option<FieldId>,
is_ascending: bool, is_ascending: bool,
original_query: Option<Query>,
iter: Option<RankingRuleOutputIterWrapper<'transaction, Query>>, iter: Option<RankingRuleOutputIterWrapper<'transaction, Query>>,
} }
impl<'transaction, Query> Sort<'transaction, Query> { impl<'transaction, Query> Sort<'transaction, Query> {
pub fn new( pub fn new(
index: &'transaction Index, index: &Index,
rtxn: &'transaction heed::RoTxn, rtxn: &'transaction heed::RoTxn,
field_name: String, field_name: String,
is_ascending: bool, is_ascending: bool,
@ -36,7 +33,7 @@ impl<'transaction, Query> Sort<'transaction, Query> {
let fields_ids_map = index.fields_ids_map(rtxn)?; let fields_ids_map = index.fields_ids_map(rtxn)?;
let field_id = fields_ids_map.id(&field_name); let field_id = fields_ids_map.id(&field_name);
Ok(Self { field_name, field_id, is_ascending, iter: None }) Ok(Self { field_name, field_id, is_ascending, original_query: None, iter: None })
} }
} }
@ -87,6 +84,7 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query
} }
None => RankingRuleOutputIterWrapper::new(Box::new(std::iter::empty())), None => RankingRuleOutputIterWrapper::new(Box::new(std::iter::empty())),
}; };
self.original_query = Some(parent_query_graph.clone());
self.iter = Some(iter); self.iter = Some(iter);
Ok(()) Ok(())
} }
@ -97,11 +95,17 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query
_txn: &'transaction RoTxn, _txn: &'transaction RoTxn,
_db_cache: &mut DatabaseCache<'transaction>, _db_cache: &mut DatabaseCache<'transaction>,
_logger: &mut dyn SearchLogger<Query>, _logger: &mut dyn SearchLogger<Query>,
_universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<Query>>> { ) -> Result<Option<RankingRuleOutput<Query>>> {
let iter = self.iter.as_mut().unwrap(); let iter = self.iter.as_mut().unwrap();
// TODO: we should make use of the universe in the function below // TODO: we should make use of the universe in the function below
iter.next_bucket() if let Some(mut bucket) = iter.next_bucket()? {
bucket.candidates &= universe;
Ok(Some(bucket))
} else {
let query = self.original_query.as_ref().unwrap().clone();
Ok(Some(RankingRuleOutput { query, candidates: universe.clone() }))
}
} }
fn end_iteration( fn end_iteration(
@ -111,6 +115,7 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query
_db_cache: &mut DatabaseCache<'transaction>, _db_cache: &mut DatabaseCache<'transaction>,
_logger: &mut dyn SearchLogger<Query>, _logger: &mut dyn SearchLogger<Query>,
) { ) {
self.original_query = None;
self.iter = None; self.iter = None;
} }
} }