AscDesc criterion returns documents ordered by numbers then by strings

This commit is contained in:
Kerollmops 2021-08-17 11:40:07 +02:00
parent 110bf6b778
commit e9ada44509
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4

View File

@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult}; use super::{Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::search::facet::FacetNumberIter; use crate::search::facet::{FacetNumberIter, FacetStringIter};
use crate::search::query_tree::Operation; use crate::search::query_tree::Operation;
use crate::{FieldId, Index, Result}; use crate::{FieldId, Index, Result};
@ -20,7 +20,7 @@ pub struct AscDesc<'t> {
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
field_name: String, field_name: String,
field_id: Option<FieldId>, field_id: Option<FieldId>,
ascending: bool, is_ascending: bool,
query_tree: Option<Operation>, query_tree: Option<Operation>,
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>, candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
allowed_candidates: RoaringBitmap, allowed_candidates: RoaringBitmap,
@ -53,12 +53,16 @@ impl<'t> AscDesc<'t> {
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
field_name: String, field_name: String,
ascending: bool, is_ascending: bool,
) -> Result<Self> { ) -> Result<Self> {
let fields_ids_map = index.fields_ids_map(rtxn)?; let fields_ids_map = index.fields_ids_map(rtxn)?;
let field_id = fields_ids_map.id(&field_name); let field_id = fields_ids_map.id(&field_name);
let faceted_candidates = match field_id { let faceted_candidates = match field_id {
Some(field_id) => index.number_faceted_documents_ids(rtxn, field_id)?, Some(field_id) => {
let number_faceted = index.number_faceted_documents_ids(rtxn, field_id)?;
let string_faceted = index.string_faceted_documents_ids(rtxn, field_id)?;
number_faceted | string_faceted
}
None => RoaringBitmap::default(), None => RoaringBitmap::default(),
}; };
@ -67,7 +71,7 @@ impl<'t> AscDesc<'t> {
rtxn, rtxn,
field_name, field_name,
field_id, field_id,
ascending, is_ascending,
query_tree: None, query_tree: None,
candidates: Box::new(std::iter::empty()), candidates: Box::new(std::iter::empty()),
allowed_candidates: RoaringBitmap::new(), allowed_candidates: RoaringBitmap::new(),
@ -87,7 +91,7 @@ impl<'t> Criterion for AscDesc<'t> {
loop { loop {
debug!( debug!(
"Facet {}({}) iteration", "Facet {}({}) iteration",
if self.ascending { "Asc" } else { "Desc" }, if self.is_ascending { "Asc" } else { "Desc" },
self.field_name self.field_name
); );
@ -136,7 +140,7 @@ impl<'t> Criterion for AscDesc<'t> {
self.index, self.index,
self.rtxn, self.rtxn,
field_id, field_id,
self.ascending, self.is_ascending,
candidates & &self.faceted_candidates, candidates & &self.faceted_candidates,
)?, )?,
None => Box::new(std::iter::empty()), None => Box::new(std::iter::empty()),
@ -167,31 +171,49 @@ fn facet_ordered<'t>(
index: &'t Index, index: &'t Index,
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
field_id: FieldId, field_id: FieldId,
ascending: bool, is_ascending: bool,
candidates: RoaringBitmap, candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> { ) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
if candidates.len() <= CANDIDATES_THRESHOLD { if candidates.len() <= CANDIDATES_THRESHOLD {
let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?; let number_iter = iterative_facet_number_ordered_iter(
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>) index,
rtxn,
field_id,
is_ascending,
candidates.clone(),
)?;
let string_iter =
iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?;
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
} else { } else {
let facet_fn = if ascending { let facet_number_fn = if is_ascending {
FacetNumberIter::new_reducing FacetNumberIter::new_reducing
} else { } else {
FacetNumberIter::new_reverse_reducing FacetNumberIter::new_reverse_reducing
}; };
let iter = facet_fn(rtxn, index, field_id, candidates)?; let number_iter = facet_number_fn(rtxn, index, field_id, candidates.clone())?
Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids)))) .map(|res| res.map(|(_, docids)| docids));
let facet_string_fn = if is_ascending {
FacetStringIter::new_reducing
} else {
FacetStringIter::new_reverse_reducing
};
let string_iter = facet_string_fn(rtxn, index, field_id, candidates)?
.map(|res| res.map(|(_, _, docids)| docids));
Ok(Box::new(number_iter.chain(string_iter)))
} }
} }
/// Fetch the whole list of candidates facet values one by one and order them by it. /// Fetch the whole list of candidates facet number values one by one and order them by it.
/// ///
/// This function is fast when the amount of candidates to rank is small. /// This function is fast when the amount of candidates to rank is small.
fn iterative_facet_ordered_iter<'t>( fn iterative_facet_number_ordered_iter<'t>(
index: &'t Index, index: &'t Index,
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
field_id: FieldId, field_id: FieldId,
ascending: bool, is_ascending: bool,
candidates: RoaringBitmap, candidates: RoaringBitmap,
) -> Result<impl Iterator<Item = RoaringBitmap> + 't> { ) -> Result<impl Iterator<Item = RoaringBitmap> + 't> {
let mut docids_values = Vec::with_capacity(candidates.len() as usize); let mut docids_values = Vec::with_capacity(candidates.len() as usize);
@ -199,14 +221,14 @@ fn iterative_facet_ordered_iter<'t>(
let left = (field_id, docid, f64::MIN); let left = (field_id, docid, f64::MIN);
let right = (field_id, docid, f64::MAX); let right = (field_id, docid, f64::MAX);
let mut iter = index.field_id_docid_facet_f64s.range(rtxn, &(left..=right))?; let mut iter = index.field_id_docid_facet_f64s.range(rtxn, &(left..=right))?;
let entry = if ascending { iter.next() } else { iter.last() }; let entry = if is_ascending { iter.next() } else { iter.last() };
if let Some(((_, _, value), ())) = entry.transpose()? { if let Some(((_, _, value), ())) = entry.transpose()? {
docids_values.push((docid, OrderedFloat(value))); docids_values.push((docid, OrderedFloat(value)));
} }
} }
docids_values.sort_unstable_by_key(|(_, v)| *v); docids_values.sort_unstable_by_key(|(_, v)| *v);
let iter = docids_values.into_iter(); let iter = docids_values.into_iter();
let iter = if ascending { let iter = if is_ascending {
Box::new(iter) as Box<dyn Iterator<Item = _>> Box::new(iter) as Box<dyn Iterator<Item = _>>
} else { } else {
Box::new(iter.rev()) Box::new(iter.rev())
@ -216,7 +238,49 @@ fn iterative_facet_ordered_iter<'t>(
// required to collect the result into an owned collection (a Vec). // required to collect the result into an owned collection (a Vec).
// https://github.com/rust-itertools/itertools/issues/499 // https://github.com/rust-itertools/itertools/issues/499
let vec: Vec<_> = iter let vec: Vec<_> = iter
.group_by(|(_, v)| v.clone()) .group_by(|(_, v)| *v)
.into_iter()
.map(|(_, ids)| ids.map(|(id, _)| id).collect())
.collect();
Ok(vec.into_iter())
}
/// Fetch the whole list of candidates facet string values one by one and order them by it.
///
/// This function is fast when the amount of candidates to rank is small.
fn iterative_facet_string_ordered_iter<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
field_id: FieldId,
is_ascending: bool,
candidates: RoaringBitmap,
) -> Result<impl Iterator<Item = RoaringBitmap> + 't> {
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
for docid in candidates.iter() {
let left = (field_id, docid, "");
let right = (field_id, docid.saturating_add(1), "");
// FIXME Doing this means that it will never be possible to retrieve
// the document with id 2^32, not sure this is a real problem.
let mut iter = index.field_id_docid_facet_strings.range(rtxn, &(left..right))?;
let entry = if is_ascending { iter.next() } else { iter.last() };
if let Some(((_, _, value), _)) = entry.transpose()? {
docids_values.push((docid, value));
}
}
docids_values.sort_unstable_by_key(|(_, v)| *v);
let iter = docids_values.into_iter();
let iter = if is_ascending {
Box::new(iter) as Box<dyn Iterator<Item = _>>
} else {
Box::new(iter.rev())
};
// The itertools GroupBy iterator doesn't provide an owned version, we are therefore
// required to collect the result into an owned collection (a Vec).
// https://github.com/rust-itertools/itertools/issues/499
let vec: Vec<_> = iter
.group_by(|(_, v)| *v)
.into_iter() .into_iter()
.map(|(_, ids)| ids.map(|(id, _)| id).collect()) .map(|(_, ids)| ids.map(|(id, _)| id).collect())
.collect(); .collect();