MeiliSearch/milli/src/search/criteria/asc_desc.rs

233 lines
8.3 KiB
Rust
Raw Normal View History

use std::collections::HashMap;
2021-02-19 15:45:15 +01:00
use std::mem::take;
use anyhow::{bail, Context as _};
2021-02-19 15:45:15 +01:00
use itertools::Itertools;
use log::debug;
2021-02-19 15:45:15 +01:00
use ordered_float::OrderedFloat;
use roaring::RoaringBitmap;
use crate::facet::FacetType;
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
2021-02-19 15:45:15 +01:00
use crate::search::facet::FacetIter;
use crate::search::query_tree::Operation;
use crate::{FieldsIdsMap, FieldId, Index};
use super::{Criterion, CriterionParameters, CriterionResult};
2021-02-19 15:45:15 +01:00
/// Threshold on the number of candidates that will make
/// the system to choose between one algorithm or another.
const CANDIDATES_THRESHOLD: u64 = 1000;
2021-02-19 15:45:15 +01:00
pub struct AscDesc<'t> {
index: &'t Index,
rtxn: &'t heed::RoTxn<'t>,
field_name: String,
2021-02-19 15:45:15 +01:00
field_id: FieldId,
facet_type: FacetType,
ascending: bool,
query_tree: Option<Operation>,
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
2021-02-25 16:47:34 +01:00
bucket_candidates: RoaringBitmap,
2021-02-19 15:45:15 +01:00
faceted_candidates: RoaringBitmap,
2021-03-23 15:25:46 +01:00
parent: Box<dyn Criterion + 't>,
2021-02-19 15:45:15 +01:00
}
impl<'t> AscDesc<'t> {
pub fn asc(
index: &'t Index,
rtxn: &'t heed::RoTxn,
parent: Box<dyn Criterion + 't>,
field_name: String,
) -> anyhow::Result<Self> {
Self::new(index, rtxn, parent, field_name, true)
2021-02-19 15:45:15 +01:00
}
pub fn desc(
index: &'t Index,
rtxn: &'t heed::RoTxn,
parent: Box<dyn Criterion + 't>,
field_name: String,
) -> anyhow::Result<Self> {
Self::new(index, rtxn, parent, field_name, false)
2021-02-19 15:45:15 +01:00
}
fn new(
index: &'t Index,
rtxn: &'t heed::RoTxn,
parent: Box<dyn Criterion + 't>,
field_name: String,
2021-02-19 15:45:15 +01:00
ascending: bool,
) -> anyhow::Result<Self> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let faceted_fields = index.faceted_fields(rtxn)?;
let (field_id, facet_type) =
field_id_facet_type(&fields_ids_map, &faceted_fields, &field_name)?;
2021-02-19 15:45:15 +01:00
Ok(AscDesc {
index,
rtxn,
field_name,
2021-02-19 15:45:15 +01:00
field_id,
facet_type,
ascending,
query_tree: None,
candidates: Box::new(std::iter::empty()),
2021-02-19 15:45:15 +01:00
faceted_candidates: index.faceted_documents_ids(rtxn, field_id)?,
2021-02-25 16:47:34 +01:00
bucket_candidates: RoaringBitmap::new(),
2021-03-23 15:25:46 +01:00
parent,
2021-02-19 15:45:15 +01:00
})
}
}
impl<'t> Criterion for AscDesc<'t> {
#[logging_timer::time("AscDesc::{}")]
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
2021-02-19 15:45:15 +01:00
loop {
debug!(
"Facet {}({}) iteration",
if self.ascending { "Asc" } else { "Desc" },
self.field_name
);
match self.candidates.next().transpose()? {
None => {
match self.parent.next(params)? {
2021-05-10 12:33:37 +02:00
Some(CriterionResult { query_tree, candidates, filtered_candidates, bucket_candidates }) => {
2021-03-23 15:25:46 +01:00
self.query_tree = query_tree;
2021-05-10 12:33:37 +02:00
let mut candidates = match (&self.query_tree, candidates) {
(_, Some(candidates)) => candidates & &self.faceted_candidates,
2021-03-23 15:25:46 +01:00
(Some(qt), None) => {
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
2021-05-10 12:33:37 +02:00
let candidates = resolve_query_tree(&context, qt, params.wdcache)?;
candidates & &self.faceted_candidates
},
2021-03-23 15:25:46 +01:00
(None, None) => take(&mut self.faceted_candidates),
};
2021-05-10 12:33:37 +02:00
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
2021-05-05 20:46:56 +02:00
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
2021-03-23 15:25:46 +01:00
if candidates.is_empty() {
continue;
}
self.candidates = facet_ordered(
self.index,
self.rtxn,
self.field_id,
self.facet_type,
self.ascending,
candidates,
)?;
},
2021-03-23 15:25:46 +01:00
None => return Ok(None),
}
2021-02-19 15:45:15 +01:00
},
Some(mut candidates) => {
candidates -= params.excluded_candidates;
2021-02-19 15:45:15 +01:00
return Ok(Some(CriterionResult {
query_tree: self.query_tree.clone(),
2021-03-09 12:04:52 +01:00
candidates: Some(candidates),
2021-05-10 12:33:37 +02:00
filtered_candidates: None,
2021-05-05 20:46:56 +02:00
bucket_candidates: Some(take(&mut self.bucket_candidates)),
2021-02-19 15:45:15 +01:00
}));
}
2021-02-19 15:45:15 +01:00
}
}
}
}
fn field_id_facet_type(
fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashMap<String, FacetType>,
field: &str,
) -> anyhow::Result<(FieldId, FacetType)> {
let id = fields_ids_map
.id(field)
.with_context(|| format!("field {:?} isn't registered", field))?;
let facet_type = faceted_fields
.get(field)
.with_context(|| format!("field {:?} isn't faceted", field))?;
Ok((id, *facet_type))
}
/// Returns an iterator over groups of the given candidates in ascending or descending order.
///
/// It will either use an iterative or a recursive method on the whole facet database depending
/// on the number of candidates to rank.
fn facet_ordered<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
2021-02-19 15:45:15 +01:00
field_id: FieldId,
facet_type: FacetType,
ascending: bool,
candidates: RoaringBitmap,
) -> anyhow::Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
2021-02-19 15:45:15 +01:00
match facet_type {
FacetType::Number => {
if candidates.len() <= CANDIDATES_THRESHOLD {
let iter =
iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?;
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
2021-02-19 15:45:15 +01:00
} else {
let facet_fn = if ascending {
FacetIter::new_reducing
2021-02-19 15:45:15 +01:00
} else {
FacetIter::new_reverse_reducing
2021-02-19 15:45:15 +01:00
};
let iter = facet_fn(rtxn, index, field_id, candidates)?;
Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids))))
2021-02-19 15:45:15 +01:00
}
}
2021-02-19 15:45:15 +01:00
FacetType::String => bail!("criteria facet type must be a number"),
}
}
/// Fetch the whole list of candidates facet values one by one and order them by it.
///
/// This function is fast when the amount of candidates to rank is small.
fn iterative_facet_ordered_iter<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
field_id: FieldId,
ascending: bool,
candidates: RoaringBitmap,
) -> anyhow::Result<impl Iterator<Item = RoaringBitmap> + 't> {
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
for docid in candidates.iter() {
let left = (field_id, docid, f64::MIN);
let right = (field_id, docid, f64::MAX);
let mut iter = index
.field_id_docid_facet_f64s
.range(rtxn, &(left..=right))?;
let entry = if ascending { iter.next() } else { iter.last() };
if let Some(((_, _, value), ())) = entry.transpose()? {
docids_values.push((docid, OrderedFloat(value)));
}
}
2021-05-10 10:27:18 +02:00
docids_values.sort_unstable_by_key(|(_, v)| *v);
let iter = docids_values.into_iter();
let iter = if ascending {
Box::new(iter) as Box<dyn Iterator<Item = _>>
} else {
Box::new(iter.rev())
};
// The itertools GroupBy iterator doesn't provide an owned version, we are therefore
// required to collect the result into an owned collection (a Vec).
// https://github.com/rust-itertools/itertools/issues/499
let vec: Vec<_> = iter
.group_by(|(_, v)| v.clone())
.into_iter()
.map(|(_, ids)| ids.map(|(id, _)| id).collect())
.collect();
Ok(vec.into_iter())
}