2023-06-15 17:36:40 +02:00
use heed ::BytesDecode ;
2023-03-08 09:55:53 +01:00
use roaring ::RoaringBitmap ;
2023-02-22 15:34:37 +01:00
use super ::logger ::SearchLogger ;
2023-03-09 11:12:31 +01:00
use super ::{ RankingRule , RankingRuleOutput , RankingRuleQueryTrait , SearchContext } ;
2023-06-15 17:36:40 +02:00
use crate ::heed_codec ::facet ::{ FacetGroupKeyCodec , OrderedF64Codec } ;
2023-11-27 11:52:22 +01:00
use crate ::heed_codec ::{ BytesRefCodec , StrRefCodec } ;
2023-06-15 17:36:40 +02:00
use crate ::score_details ::{ self , ScoreDetails } ;
2023-03-20 09:30:10 +01:00
use crate ::search ::facet ::{ ascending_facet_sort , descending_facet_sort } ;
use crate ::{ FieldId , Index , Result } ;
2023-03-09 11:12:31 +01:00
2023-03-13 14:03:48 +01:00
pub trait RankingRuleOutputIter < ' ctx , Query > {
2023-03-09 11:12:31 +01:00
fn next_bucket ( & mut self ) -> Result < Option < RankingRuleOutput < Query > > > ;
}
2023-03-13 14:03:48 +01:00
pub struct RankingRuleOutputIterWrapper < ' ctx , Query > {
iter : Box < dyn Iterator < Item = Result < RankingRuleOutput < Query > > > + ' ctx > ,
2023-03-09 11:12:31 +01:00
}
2023-03-13 14:03:48 +01:00
impl < ' ctx , Query > RankingRuleOutputIterWrapper < ' ctx , Query > {
pub fn new ( iter : Box < dyn Iterator < Item = Result < RankingRuleOutput < Query > > > + ' ctx > ) -> Self {
2023-03-09 11:12:31 +01:00
Self { iter }
}
}
2023-03-13 14:03:48 +01:00
impl < ' ctx , Query > RankingRuleOutputIter < ' ctx , Query > for RankingRuleOutputIterWrapper < ' ctx , Query > {
2023-03-09 11:12:31 +01:00
fn next_bucket ( & mut self ) -> Result < Option < RankingRuleOutput < Query > > > {
match self . iter . next ( ) {
Some ( x ) = > x . map ( Some ) ,
None = > Ok ( None ) ,
}
}
}
2023-03-27 11:04:04 +02:00
// `Query` type parameter: the same as the type parameter to bucket_sort
// implements RankingRuleQuery trait, either querygraph or placeholdersearch
// The sort ranking rule doesn't need the query parameter, it is doing the same thing
// whether we're doing a querygraph or placeholder search.
//
// Query Stored anyway because every ranking rule must return a query from next_bucket
// ---
// "Mismatch" between new/old impl.:
// - old impl: roaring bitmap as input, ranking rule iterates other all the buckets
// - new impl: still works like that but it shouldn't, because the universe may change for every call to next_bucket, itself due to:
// 1. elements that were already returned by the ranking rule are subtracted from the universe, also done in the old impl (subtracted from the candidates)
// 2. NEW in the new impl.: distinct rule might have been applied btwn calls to next_bucket
// new impl ignores docs removed in (2), which is a missed perf opt issue, see `next_bucket`
// this perf problem is P2
// mostly happens when many documents map to the same distinct attribute value.
2023-03-13 14:03:48 +01:00
pub struct Sort < ' ctx , Query > {
2023-02-22 15:34:37 +01:00
field_name : String ,
2023-02-21 09:49:43 +01:00
field_id : Option < FieldId > ,
is_ascending : bool ,
2023-02-28 12:42:29 +01:00
original_query : Option < Query > ,
2023-03-13 14:03:48 +01:00
iter : Option < RankingRuleOutputIterWrapper < ' ctx , Query > > ,
2023-06-15 17:36:40 +02:00
must_redact : bool ,
2023-02-21 09:49:43 +01:00
}
2023-03-13 14:03:48 +01:00
impl < ' ctx , Query > Sort < ' ctx , Query > {
2023-03-28 12:40:52 +02:00
pub fn new (
2023-02-28 12:42:29 +01:00
index : & Index ,
2023-03-13 14:03:48 +01:00
rtxn : & ' ctx heed ::RoTxn ,
2023-02-21 09:49:43 +01:00
field_name : String ,
is_ascending : bool ,
) -> Result < Self > {
let fields_ids_map = index . fields_ids_map ( rtxn ) ? ;
let field_id = fields_ids_map . id ( & field_name ) ;
2023-06-15 17:36:40 +02:00
let must_redact = Self ::must_redact ( index , rtxn , & field_name ) ? ;
2023-02-21 09:49:43 +01:00
2023-06-15 17:36:40 +02:00
Ok ( Self {
field_name ,
field_id ,
is_ascending ,
original_query : None ,
iter : None ,
must_redact ,
} )
}
fn must_redact ( index : & Index , rtxn : & ' ctx heed ::RoTxn , field_name : & str ) -> Result < bool > {
2023-07-03 10:20:28 +02:00
let Some ( displayed_fields ) = index . displayed_fields ( rtxn ) ? else {
return Ok ( false ) ;
} ;
2023-06-15 17:36:40 +02:00
Ok ( ! displayed_fields . iter ( ) . any ( | & field | field = = field_name ) )
2023-02-21 09:49:43 +01:00
}
}
2023-03-13 14:03:48 +01:00
impl < ' ctx , Query : RankingRuleQueryTrait > RankingRule < ' ctx , Query > for Sort < ' ctx , Query > {
2023-02-22 15:34:37 +01:00
fn id ( & self ) -> String {
let Self { field_name , is_ascending , .. } = self ;
2023-06-06 18:21:31 +02:00
format! ( " {field_name} : {} " , if * is_ascending { " asc " } else { " desc " } )
2023-02-22 15:34:37 +01:00
}
2023-02-21 09:49:43 +01:00
fn start_iteration (
& mut self ,
2023-03-13 14:03:48 +01:00
ctx : & mut SearchContext < ' ctx > ,
2023-02-22 15:34:37 +01:00
_logger : & mut dyn SearchLogger < Query > ,
2023-02-21 09:49:43 +01:00
parent_candidates : & RoaringBitmap ,
2023-03-20 09:30:10 +01:00
parent_query : & Query ,
2023-02-21 09:49:43 +01:00
) -> Result < ( ) > {
let iter : RankingRuleOutputIterWrapper < Query > = match self . field_id {
Some ( field_id ) = > {
2023-03-20 09:26:11 +01:00
let number_db = ctx
. index
. facet_id_f64_docids
2023-11-27 11:52:22 +01:00
. remap_key_type ::< FacetGroupKeyCodec < BytesRefCodec > > ( ) ;
2023-03-20 09:26:11 +01:00
let string_db = ctx
. index
. facet_id_string_docids
2023-11-27 11:52:22 +01:00
. remap_key_type ::< FacetGroupKeyCodec < BytesRefCodec > > ( ) ;
2023-02-21 09:49:43 +01:00
2023-03-20 09:26:11 +01:00
let ( number_iter , string_iter ) = if self . is_ascending {
let number_iter = ascending_facet_sort (
ctx . txn ,
number_db ,
field_id ,
parent_candidates . clone ( ) ,
) ? ;
let string_iter = ascending_facet_sort (
ctx . txn ,
string_db ,
field_id ,
parent_candidates . clone ( ) ,
) ? ;
( itertools ::Either ::Left ( number_iter ) , itertools ::Either ::Left ( string_iter ) )
} else {
let number_iter = descending_facet_sort (
ctx . txn ,
number_db ,
field_id ,
parent_candidates . clone ( ) ,
) ? ;
let string_iter = descending_facet_sort (
ctx . txn ,
string_db ,
field_id ,
parent_candidates . clone ( ) ,
) ? ;
( itertools ::Either ::Right ( number_iter ) , itertools ::Either ::Right ( string_iter ) )
} ;
2023-06-15 17:36:40 +02:00
let number_iter = number_iter . map ( | r | -> Result < _ > {
let ( docids , bytes ) = r ? ;
Ok ( (
docids ,
serde_json ::Value ::Number (
serde_json ::Number ::from_f64 (
OrderedF64Codec ::bytes_decode ( bytes ) . expect ( " some number " ) ,
)
. expect ( " too big float " ) ,
) ,
) )
} ) ;
let string_iter = string_iter . map ( | r | -> Result < _ > {
let ( docids , bytes ) = r ? ;
Ok ( (
docids ,
serde_json ::Value ::String (
StrRefCodec ::bytes_decode ( bytes ) . expect ( " some string " ) . to_owned ( ) ,
) ,
) )
} ) ;
2023-02-21 09:49:43 +01:00
2023-03-20 09:30:10 +01:00
let query_graph = parent_query . clone ( ) ;
2023-06-15 17:36:40 +02:00
let ascending = self . is_ascending ;
let field_name = self . field_name . clone ( ) ;
let must_redact = self . must_redact ;
2023-02-21 09:49:43 +01:00
RankingRuleOutputIterWrapper ::new ( Box ::new ( number_iter . chain ( string_iter ) . map (
2023-03-20 09:26:11 +01:00
move | r | {
2023-06-15 17:36:40 +02:00
let ( docids , value ) = r ? ;
Ok ( RankingRuleOutput {
query : query_graph . clone ( ) ,
candidates : docids ,
score : ScoreDetails ::Sort ( score_details ::Sort {
field_name : field_name . clone ( ) ,
ascending ,
redacted : must_redact ,
value ,
} ) ,
} )
2023-02-21 09:49:43 +01:00
} ,
) ) )
}
None = > RankingRuleOutputIterWrapper ::new ( Box ::new ( std ::iter ::empty ( ) ) ) ,
} ;
2023-03-20 09:30:10 +01:00
self . original_query = Some ( parent_query . clone ( ) ) ;
2023-02-21 09:49:43 +01:00
self . iter = Some ( iter ) ;
Ok ( ( ) )
}
fn next_bucket (
& mut self ,
2023-03-13 14:03:48 +01:00
_ctx : & mut SearchContext < ' ctx > ,
2023-02-22 15:34:37 +01:00
_logger : & mut dyn SearchLogger < Query > ,
2023-02-28 12:42:29 +01:00
universe : & RoaringBitmap ,
2023-02-21 09:49:43 +01:00
) -> Result < Option < RankingRuleOutput < Query > > > {
let iter = self . iter . as_mut ( ) . unwrap ( ) ;
2023-02-28 12:42:29 +01:00
if let Some ( mut bucket ) = iter . next_bucket ( ) ? {
bucket . candidates & = universe ;
Ok ( Some ( bucket ) )
} else {
let query = self . original_query . as_ref ( ) . unwrap ( ) . clone ( ) ;
2023-06-15 17:36:40 +02:00
Ok ( Some ( RankingRuleOutput {
query ,
candidates : universe . clone ( ) ,
score : ScoreDetails ::Sort ( score_details ::Sort {
field_name : self . field_name . clone ( ) ,
ascending : self . is_ascending ,
redacted : self . must_redact ,
value : serde_json ::Value ::Null ,
} ) ,
} ) )
2023-02-28 12:42:29 +01:00
}
2023-02-21 09:49:43 +01:00
}
fn end_iteration (
& mut self ,
2023-03-13 14:03:48 +01:00
_ctx : & mut SearchContext < ' ctx > ,
2023-02-22 15:34:37 +01:00
_logger : & mut dyn SearchLogger < Query > ,
2023-02-21 09:49:43 +01:00
) {
2023-02-28 12:42:29 +01:00
self . original_query = None ;
2023-02-21 09:49:43 +01:00
self . iter = None ;
}
}