mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
Rework the Asc/Desc criteria to be facet iterator based
This commit is contained in:
parent
a58d2b6137
commit
3c76b3548d
@ -2,8 +2,10 @@ use std::collections::HashMap;
|
|||||||
use std::mem::take;
|
use std::mem::take;
|
||||||
|
|
||||||
use anyhow::{bail, Context as _};
|
use anyhow::{bail, Context as _};
|
||||||
|
use heed::{BytesDecode, BytesEncode};
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
|
use num_traits::Bounded;
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
@ -24,7 +26,7 @@ pub struct AscDesc<'t> {
|
|||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
ascending: bool,
|
ascending: bool,
|
||||||
query_tree: Option<Operation>,
|
query_tree: Option<Operation>,
|
||||||
candidates: RoaringBitmap,
|
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
|
||||||
bucket_candidates: RoaringBitmap,
|
bucket_candidates: RoaringBitmap,
|
||||||
faceted_candidates: RoaringBitmap,
|
faceted_candidates: RoaringBitmap,
|
||||||
parent: Option<Box<dyn Criterion + 't>>,
|
parent: Option<Box<dyn Criterion + 't>>,
|
||||||
@ -107,7 +109,7 @@ impl<'t> AscDesc<'t> {
|
|||||||
facet_type,
|
facet_type,
|
||||||
ascending,
|
ascending,
|
||||||
query_tree,
|
query_tree,
|
||||||
candidates,
|
candidates: facet_ordered(index, rtxn, field_id, facet_type, ascending, candidates)?,
|
||||||
faceted_candidates,
|
faceted_candidates,
|
||||||
bucket_candidates: RoaringBitmap::new(),
|
bucket_candidates: RoaringBitmap::new(),
|
||||||
parent: None,
|
parent: None,
|
||||||
@ -134,7 +136,7 @@ impl<'t> AscDesc<'t> {
|
|||||||
facet_type,
|
facet_type,
|
||||||
ascending,
|
ascending,
|
||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates: RoaringBitmap::new(),
|
candidates: Box::new(std::iter::empty()),
|
||||||
faceted_candidates: index.faceted_documents_ids(rtxn, field_id)?,
|
faceted_candidates: index.faceted_documents_ids(rtxn, field_id)?,
|
||||||
bucket_candidates: RoaringBitmap::new(),
|
bucket_candidates: RoaringBitmap::new(),
|
||||||
parent: Some(parent),
|
parent: Some(parent),
|
||||||
@ -145,23 +147,28 @@ impl<'t> AscDesc<'t> {
|
|||||||
impl<'t> Criterion for AscDesc<'t> {
|
impl<'t> Criterion for AscDesc<'t> {
|
||||||
fn next(&mut self) -> anyhow::Result<Option<CriterionResult>> {
|
fn next(&mut self) -> anyhow::Result<Option<CriterionResult>> {
|
||||||
loop {
|
loop {
|
||||||
debug!("Facet {}({}) iteration ({:?})",
|
debug!("Facet {}({}) iteration",
|
||||||
if self.ascending { "Asc" } else { "Desc" }, self.field_name, self.candidates,
|
if self.ascending { "Asc" } else { "Desc" }, self.field_name
|
||||||
);
|
);
|
||||||
|
|
||||||
match &mut self.candidates {
|
match self.candidates.next().transpose()? {
|
||||||
candidates if candidates.is_empty() => {
|
None => {
|
||||||
let query_tree = self.query_tree.take();
|
let query_tree = self.query_tree.take();
|
||||||
let candidates = take(&mut self.candidates);
|
|
||||||
let bucket_candidates = take(&mut self.bucket_candidates);
|
let bucket_candidates = take(&mut self.bucket_candidates);
|
||||||
|
|
||||||
match self.parent.as_mut() {
|
match self.parent.as_mut() {
|
||||||
Some(parent) => {
|
Some(parent) => {
|
||||||
match parent.next()? {
|
match parent.next()? {
|
||||||
Some(CriterionResult { query_tree, mut candidates, bucket_candidates }) => {
|
Some(CriterionResult { query_tree, mut candidates, bucket_candidates }) => {
|
||||||
self.query_tree = query_tree;
|
self.query_tree = query_tree;
|
||||||
candidates.intersect_with(&self.faceted_candidates);
|
candidates.intersect_with(&self.faceted_candidates);
|
||||||
self.candidates = candidates;
|
self.candidates = facet_ordered(
|
||||||
|
self.index,
|
||||||
|
self.rtxn,
|
||||||
|
self.field_id,
|
||||||
|
self.facet_type,
|
||||||
|
self.ascending,
|
||||||
|
candidates,
|
||||||
|
)?;
|
||||||
self.bucket_candidates = bucket_candidates;
|
self.bucket_candidates = bucket_candidates;
|
||||||
},
|
},
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
@ -172,28 +179,21 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
return Ok(Some(CriterionResult { query_tree, candidates, bucket_candidates }));
|
return Ok(Some(CriterionResult {
|
||||||
|
query_tree,
|
||||||
|
candidates: RoaringBitmap::new(),
|
||||||
|
bucket_candidates,
|
||||||
|
}));
|
||||||
},
|
},
|
||||||
candidates => {
|
Some(candidates) => {
|
||||||
let bucket_candidates = match self.parent {
|
let bucket_candidates = match self.parent {
|
||||||
Some(_) => take(&mut self.bucket_candidates),
|
Some(_) => take(&mut self.bucket_candidates),
|
||||||
None => candidates.clone(),
|
None => candidates.clone(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let found_candidates = facet_ordered(
|
|
||||||
self.index,
|
|
||||||
self.rtxn,
|
|
||||||
self.field_id,
|
|
||||||
self.facet_type,
|
|
||||||
self.ascending,
|
|
||||||
candidates.clone(),
|
|
||||||
)?;
|
|
||||||
|
|
||||||
candidates.difference_with(&found_candidates);
|
|
||||||
|
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: self.query_tree.clone(),
|
query_tree: self.query_tree.clone(),
|
||||||
candidates: found_candidates,
|
candidates,
|
||||||
bucket_candidates,
|
bucket_candidates,
|
||||||
}));
|
}));
|
||||||
},
|
},
|
||||||
@ -217,86 +217,98 @@ fn field_id_facet_type(
|
|||||||
Ok((id, *facet_type))
|
Ok((id, *facet_type))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn facet_ordered(
|
/// Returns an iterator over groups of the given candidates in ascending or descending order.
|
||||||
index: &Index,
|
///
|
||||||
rtxn: &heed::RoTxn,
|
/// It will either use an iterative or a recusrsive method on the whole facet database depending
|
||||||
|
/// on the number of candidates to rank.
|
||||||
|
fn facet_ordered<'t>(
|
||||||
|
index: &'t Index,
|
||||||
|
rtxn: &'t heed::RoTxn,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
ascending: bool,
|
ascending: bool,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> anyhow::Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>>
|
||||||
{
|
{
|
||||||
match facet_type {
|
match facet_type {
|
||||||
FacetType::Float => {
|
FacetType::Float => {
|
||||||
if candidates.len() <= 1000 {
|
if candidates.len() <= 1000 {
|
||||||
let db = index.field_id_docid_facet_values.remap_key_type::<FieldDocIdFacetF64Codec>();
|
let iter = iterative_facet_ordered_iter::<FieldDocIdFacetF64Codec, f64, OrderedFloat<f64>>(
|
||||||
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
|
index, rtxn, field_id, ascending, candidates,
|
||||||
for docid in candidates.iter() {
|
)?;
|
||||||
let left = (field_id, docid, f64::MIN);
|
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
|
||||||
let right = (field_id, docid, f64::MAX);
|
|
||||||
let mut iter = db.range(rtxn, &(left..=right))?;
|
|
||||||
let entry = if ascending { iter.next() } else { iter.last() };
|
|
||||||
if let Some(((_, _, value), ())) = entry.transpose()? {
|
|
||||||
docids_values.push((docid, OrderedFloat(value)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
docids_values.sort_unstable_by_key(|(_, value)| *value);
|
|
||||||
let iter = docids_values.into_iter();
|
|
||||||
let iter = if ascending {
|
|
||||||
Box::new(iter) as Box<dyn Iterator<Item = _>>
|
|
||||||
} else {
|
|
||||||
Box::new(iter.rev())
|
|
||||||
};
|
|
||||||
match iter.group_by(|(_, v)| *v).into_iter().next() {
|
|
||||||
Some((_, ids)) => Ok(ids.map(|(id, _)| id).into_iter().collect()),
|
|
||||||
None => Ok(RoaringBitmap::new())
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
let facet_fn = if ascending {
|
let facet_fn = if ascending {
|
||||||
FacetIter::<f64, FacetLevelValueF64Codec>::new_reducing
|
FacetIter::<f64, FacetLevelValueF64Codec>::new_reducing
|
||||||
} else {
|
} else {
|
||||||
FacetIter::<f64, FacetLevelValueF64Codec>::new_reverse_reducing
|
FacetIter::<f64, FacetLevelValueF64Codec>::new_reverse_reducing
|
||||||
};
|
};
|
||||||
|
let iter = facet_fn(rtxn, index, field_id, candidates)?;
|
||||||
let mut iter = facet_fn(rtxn, index, field_id, candidates)?;
|
Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids))))
|
||||||
Ok(iter.next().transpose()?.map(|(_, docids)| docids).unwrap_or_default())
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
FacetType::Integer => {
|
FacetType::Integer => {
|
||||||
if candidates.len() <= 1000 {
|
if candidates.len() <= 1000 {
|
||||||
let db = index.field_id_docid_facet_values.remap_key_type::<FieldDocIdFacetI64Codec>();
|
let iter = iterative_facet_ordered_iter::<FieldDocIdFacetI64Codec, i64, i64>(
|
||||||
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
|
index, rtxn, field_id, ascending, candidates,
|
||||||
for docid in candidates.iter() {
|
)?;
|
||||||
let left = (field_id, docid, i64::MIN);
|
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
|
||||||
let right = (field_id, docid, i64::MAX);
|
|
||||||
let mut iter = db.range(rtxn, &(left..=right))?;
|
|
||||||
let entry = if ascending { iter.next() } else { iter.last() };
|
|
||||||
if let Some(((_, _, value), ())) = entry.transpose()? {
|
|
||||||
docids_values.push((docid, value));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
docids_values.sort_unstable_by_key(|(_, value)| *value);
|
|
||||||
let iter = docids_values.into_iter();
|
|
||||||
let iter = if ascending {
|
|
||||||
Box::new(iter) as Box<dyn Iterator<Item = _>>
|
|
||||||
} else {
|
|
||||||
Box::new(iter.rev())
|
|
||||||
};
|
|
||||||
match iter.group_by(|(_, v)| *v).into_iter().next() {
|
|
||||||
Some((_, ids)) => Ok(ids.map(|(id, _)| id).into_iter().collect()),
|
|
||||||
None => Ok(RoaringBitmap::new())
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
let facet_fn = if ascending {
|
let facet_fn = if ascending {
|
||||||
FacetIter::<i64, FacetLevelValueI64Codec>::new_reducing
|
FacetIter::<i64, FacetLevelValueI64Codec>::new_reducing
|
||||||
} else {
|
} else {
|
||||||
FacetIter::<i64, FacetLevelValueI64Codec>::new_reverse_reducing
|
FacetIter::<i64, FacetLevelValueI64Codec>::new_reverse_reducing
|
||||||
};
|
};
|
||||||
|
let iter = facet_fn(rtxn, index, field_id, candidates)?;
|
||||||
let mut iter = facet_fn(rtxn, index, field_id, candidates)?;
|
Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids))))
|
||||||
Ok(iter.next().transpose()?.map(|(_, docids)| docids).unwrap_or_default())
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
FacetType::String => bail!("criteria facet type must be a number"),
|
FacetType::String => bail!("criteria facet type must be a number"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Fetch the whole list of candidates facet values one by one and order them by it.
|
||||||
|
///
|
||||||
|
/// This function is fast when the amount of candidates to rank is small.
|
||||||
|
fn iterative_facet_ordered_iter<'t, KC, T, U>(
|
||||||
|
index: &'t Index,
|
||||||
|
rtxn: &'t heed::RoTxn,
|
||||||
|
field_id: FieldId,
|
||||||
|
ascending: bool,
|
||||||
|
candidates: RoaringBitmap,
|
||||||
|
) -> anyhow::Result<impl Iterator<Item = RoaringBitmap> + 't>
|
||||||
|
where
|
||||||
|
KC: BytesDecode<'t, DItem = (FieldId, u32, T)>,
|
||||||
|
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u32, T)>,
|
||||||
|
T: Bounded,
|
||||||
|
U: From<T> + Ord + Clone + 't,
|
||||||
|
{
|
||||||
|
let db = index.field_id_docid_facet_values.remap_key_type::<KC>();
|
||||||
|
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
|
||||||
|
for docid in candidates.iter() {
|
||||||
|
let left = (field_id, docid, T::min_value());
|
||||||
|
let right = (field_id, docid, T::max_value());
|
||||||
|
let mut iter = db.range(rtxn, &(left..=right))?;
|
||||||
|
let entry = if ascending { iter.next() } else { iter.last() };
|
||||||
|
if let Some(((_, _, value), ())) = entry.transpose()? {
|
||||||
|
docids_values.push((docid, U::from(value)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
docids_values.sort_unstable_by_key(|(_, v)| v.clone());
|
||||||
|
let iter = docids_values.into_iter();
|
||||||
|
let iter = if ascending {
|
||||||
|
Box::new(iter) as Box<dyn Iterator<Item = _>>
|
||||||
|
} else {
|
||||||
|
Box::new(iter.rev())
|
||||||
|
};
|
||||||
|
|
||||||
|
// The itertools GroupBy iterator doesn't provide an owned version, we are therefore
|
||||||
|
// required to collect the result into an owned collection (a Vec).
|
||||||
|
// https://github.com/rust-itertools/itertools/issues/499
|
||||||
|
let vec: Vec<_> = iter.group_by(|(_, v)| v.clone())
|
||||||
|
.into_iter()
|
||||||
|
.map(|(_, ids)| ids.map(|(id, _)| id).collect())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok(vec.into_iter())
|
||||||
|
}
|
||||||
|
@ -1,12 +1,11 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use anyhow::{bail, Context as _};
|
use anyhow::bail;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::facet::FacetType;
|
|
||||||
use crate::search::word_derivations;
|
use crate::search::word_derivations;
|
||||||
use crate::{Index, FieldId};
|
use crate::Index;
|
||||||
|
|
||||||
use super::query_tree::{Operation, Query, QueryKind};
|
use super::query_tree::{Operation, Query, QueryKind};
|
||||||
use self::typo::Typo;
|
use self::typo::Typo;
|
||||||
|
Loading…
Reference in New Issue
Block a user