2023-03-08 09:55:53 +01:00
use roaring ::RoaringBitmap ;
2023-02-22 15:34:37 +01:00
use super ::logger ::SearchLogger ;
2023-03-09 11:12:31 +01:00
use super ::{ RankingRule , RankingRuleOutput , RankingRuleQueryTrait , SearchContext } ;
2023-03-20 09:30:10 +01:00
use crate ::heed_codec ::facet ::FacetGroupKeyCodec ;
use crate ::heed_codec ::ByteSliceRefCodec ;
use crate ::search ::facet ::{ ascending_facet_sort , descending_facet_sort } ;
use crate ::{ FieldId , Index , Result } ;
2023-03-09 11:12:31 +01:00
2023-03-13 14:03:48 +01:00
pub trait RankingRuleOutputIter < ' ctx , Query > {
2023-03-09 11:12:31 +01:00
fn next_bucket ( & mut self ) -> Result < Option < RankingRuleOutput < Query > > > ;
}
2023-03-13 14:03:48 +01:00
pub struct RankingRuleOutputIterWrapper < ' ctx , Query > {
iter : Box < dyn Iterator < Item = Result < RankingRuleOutput < Query > > > + ' ctx > ,
2023-03-09 11:12:31 +01:00
}
2023-03-13 14:03:48 +01:00
impl < ' ctx , Query > RankingRuleOutputIterWrapper < ' ctx , Query > {
pub fn new ( iter : Box < dyn Iterator < Item = Result < RankingRuleOutput < Query > > > + ' ctx > ) -> Self {
2023-03-09 11:12:31 +01:00
Self { iter }
}
}
2023-03-13 14:03:48 +01:00
impl < ' ctx , Query > RankingRuleOutputIter < ' ctx , Query > for RankingRuleOutputIterWrapper < ' ctx , Query > {
2023-03-09 11:12:31 +01:00
fn next_bucket ( & mut self ) -> Result < Option < RankingRuleOutput < Query > > > {
match self . iter . next ( ) {
Some ( x ) = > x . map ( Some ) ,
None = > Ok ( None ) ,
}
}
}
2023-03-27 11:04:04 +02:00
// `Query` type parameter: the same as the type parameter to bucket_sort
// implements RankingRuleQuery trait, either querygraph or placeholdersearch
// The sort ranking rule doesn't need the query parameter, it is doing the same thing
// whether we're doing a querygraph or placeholder search.
//
// Query Stored anyway because every ranking rule must return a query from next_bucket
// ---
// "Mismatch" between new/old impl.:
// - old impl: roaring bitmap as input, ranking rule iterates other all the buckets
// - new impl: still works like that but it shouldn't, because the universe may change for every call to next_bucket, itself due to:
// 1. elements that were already returned by the ranking rule are subtracted from the universe, also done in the old impl (subtracted from the candidates)
// 2. NEW in the new impl.: distinct rule might have been applied btwn calls to next_bucket
// new impl ignores docs removed in (2), which is a missed perf opt issue, see `next_bucket`
// this perf problem is P2
// mostly happens when many documents map to the same distinct attribute value.
2023-03-13 14:03:48 +01:00
pub struct Sort < ' ctx , Query > {
2023-02-22 15:34:37 +01:00
field_name : String ,
2023-02-21 09:49:43 +01:00
field_id : Option < FieldId > ,
is_ascending : bool ,
2023-02-28 12:42:29 +01:00
original_query : Option < Query > ,
2023-03-13 14:03:48 +01:00
iter : Option < RankingRuleOutputIterWrapper < ' ctx , Query > > ,
2023-02-21 09:49:43 +01:00
}
2023-03-13 14:03:48 +01:00
impl < ' ctx , Query > Sort < ' ctx , Query > {
2023-03-28 12:40:52 +02:00
pub fn new (
2023-02-28 12:42:29 +01:00
index : & Index ,
2023-03-13 14:03:48 +01:00
rtxn : & ' ctx heed ::RoTxn ,
2023-02-21 09:49:43 +01:00
field_name : String ,
is_ascending : bool ,
) -> Result < Self > {
let fields_ids_map = index . fields_ids_map ( rtxn ) ? ;
let field_id = fields_ids_map . id ( & field_name ) ;
2023-02-28 12:42:29 +01:00
Ok ( Self { field_name , field_id , is_ascending , original_query : None , iter : None } )
2023-02-21 09:49:43 +01:00
}
}
2023-03-13 14:03:48 +01:00
impl < ' ctx , Query : RankingRuleQueryTrait > RankingRule < ' ctx , Query > for Sort < ' ctx , Query > {
2023-02-22 15:34:37 +01:00
fn id ( & self ) -> String {
let Self { field_name , is_ascending , .. } = self ;
format! ( " {field_name} : {} " , if * is_ascending { " asc " } else { " desc " } )
}
2023-02-21 09:49:43 +01:00
fn start_iteration (
& mut self ,
2023-03-13 14:03:48 +01:00
ctx : & mut SearchContext < ' ctx > ,
2023-02-22 15:34:37 +01:00
_logger : & mut dyn SearchLogger < Query > ,
2023-02-21 09:49:43 +01:00
parent_candidates : & RoaringBitmap ,
2023-03-20 09:30:10 +01:00
parent_query : & Query ,
2023-02-21 09:49:43 +01:00
) -> Result < ( ) > {
let iter : RankingRuleOutputIterWrapper < Query > = match self . field_id {
Some ( field_id ) = > {
2023-03-20 09:26:11 +01:00
let number_db = ctx
. index
. facet_id_f64_docids
. remap_key_type ::< FacetGroupKeyCodec < ByteSliceRefCodec > > ( ) ;
let string_db = ctx
. index
. facet_id_string_docids
. remap_key_type ::< FacetGroupKeyCodec < ByteSliceRefCodec > > ( ) ;
2023-02-21 09:49:43 +01:00
2023-03-20 09:26:11 +01:00
let ( number_iter , string_iter ) = if self . is_ascending {
let number_iter = ascending_facet_sort (
ctx . txn ,
number_db ,
field_id ,
parent_candidates . clone ( ) ,
) ? ;
let string_iter = ascending_facet_sort (
ctx . txn ,
string_db ,
field_id ,
parent_candidates . clone ( ) ,
) ? ;
( itertools ::Either ::Left ( number_iter ) , itertools ::Either ::Left ( string_iter ) )
} else {
let number_iter = descending_facet_sort (
ctx . txn ,
number_db ,
field_id ,
parent_candidates . clone ( ) ,
) ? ;
let string_iter = descending_facet_sort (
ctx . txn ,
string_db ,
field_id ,
parent_candidates . clone ( ) ,
) ? ;
( itertools ::Either ::Right ( number_iter ) , itertools ::Either ::Right ( string_iter ) )
} ;
2023-02-21 09:49:43 +01:00
2023-03-20 09:30:10 +01:00
let query_graph = parent_query . clone ( ) ;
2023-02-21 09:49:43 +01:00
RankingRuleOutputIterWrapper ::new ( Box ::new ( number_iter . chain ( string_iter ) . map (
2023-03-20 09:26:11 +01:00
move | r | {
let ( docids , _ ) = r ? ;
Ok ( RankingRuleOutput { query : query_graph . clone ( ) , candidates : docids } )
2023-02-21 09:49:43 +01:00
} ,
) ) )
}
None = > RankingRuleOutputIterWrapper ::new ( Box ::new ( std ::iter ::empty ( ) ) ) ,
} ;
2023-03-20 09:30:10 +01:00
self . original_query = Some ( parent_query . clone ( ) ) ;
2023-02-21 09:49:43 +01:00
self . iter = Some ( iter ) ;
Ok ( ( ) )
}
fn next_bucket (
& mut self ,
2023-03-13 14:03:48 +01:00
_ctx : & mut SearchContext < ' ctx > ,
2023-02-22 15:34:37 +01:00
_logger : & mut dyn SearchLogger < Query > ,
2023-02-28 12:42:29 +01:00
universe : & RoaringBitmap ,
2023-02-21 09:49:43 +01:00
) -> Result < Option < RankingRuleOutput < Query > > > {
let iter = self . iter . as_mut ( ) . unwrap ( ) ;
// TODO: we should make use of the universe in the function below
2023-03-27 11:04:04 +02:00
// good for correctness, but ideally iter.next_bucket would take the current universe into account,
// as right now it could return buckets that don't intersect with the universe, meaning we will make many
// unneeded calls.
2023-02-28 12:42:29 +01:00
if let Some ( mut bucket ) = iter . next_bucket ( ) ? {
bucket . candidates & = universe ;
Ok ( Some ( bucket ) )
} else {
let query = self . original_query . as_ref ( ) . unwrap ( ) . clone ( ) ;
Ok ( Some ( RankingRuleOutput { query , candidates : universe . clone ( ) } ) )
}
2023-02-21 09:49:43 +01:00
}
fn end_iteration (
& mut self ,
2023-03-13 14:03:48 +01:00
_ctx : & mut SearchContext < ' ctx > ,
2023-02-22 15:34:37 +01:00
_logger : & mut dyn SearchLogger < Query > ,
2023-02-21 09:49:43 +01:00
) {
2023-02-28 12:42:29 +01:00
self . original_query = None ;
2023-02-21 09:49:43 +01:00
self . iter = None ;
}
}