mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-06-28 09:28:30 +02:00
Optimize facet sort
5 to 10x speedup
This commit is contained in:
parent
4534dc2cab
commit
340d9e6edc
@ -1558,19 +1558,32 @@ fn retrieve_documents<S: AsRef<str>>(
|
|||||||
})?
|
})?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut facet_sort = None;
|
||||||
if let Some(sort) = sort_criteria {
|
if let Some(sort) = sort_criteria {
|
||||||
candidates = recursive_facet_sort(index, &rtxn, &sort, candidates)?;
|
facet_sort = Some(recursive_facet_sort(index, &rtxn, &sort, &candidates)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
let (it, number_of_documents) = {
|
let (it, number_of_documents) = if let Some(facet_sort) = &facet_sort {
|
||||||
|
let number_of_documents = candidates.len();
|
||||||
|
let iter = facet_sort.iter()?;
|
||||||
|
(
|
||||||
|
itertools::Either::Left(some_documents(
|
||||||
|
index,
|
||||||
|
&rtxn,
|
||||||
|
iter.map(|d| d.unwrap()).skip(offset).take(limit),
|
||||||
|
retrieve_vectors,
|
||||||
|
)?),
|
||||||
|
number_of_documents,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
let number_of_documents = candidates.len();
|
let number_of_documents = candidates.len();
|
||||||
(
|
(
|
||||||
some_documents(
|
itertools::Either::Right(some_documents(
|
||||||
index,
|
index,
|
||||||
&rtxn,
|
&rtxn,
|
||||||
candidates.into_iter().skip(offset).take(limit),
|
candidates.into_iter().skip(offset).take(limit),
|
||||||
retrieve_vectors,
|
retrieve_vectors,
|
||||||
)?,
|
)?),
|
||||||
number_of_documents,
|
number_of_documents,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
@ -1,19 +1,46 @@
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use heed::Database;
|
use heed::Database;
|
||||||
use crate::{heed_codec::{facet::{FacetGroupKeyCodec, FacetGroupValueCodec}, BytesRefCodec}, search::{facet::{ascending_facet_sort, descending_facet_sort}, new::check_sort_criteria}, AscDesc, Member};
|
use crate::{heed_codec::{facet::{FacetGroupKeyCodec, FacetGroupValueCodec}, BytesRefCodec}, search::{facet::{ascending_facet_sort, descending_facet_sort}, new::check_sort_criteria}, AscDesc, DocumentId, Member};
|
||||||
|
|
||||||
fn recursive_facet_sort_inner<'t>(
|
/// Builder for a [`SortedDocumentsIterator`].
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
/// Most builders won't ever be built, because pagination will skip them.
|
||||||
|
pub struct SortedDocumentsIteratorBuilder<'ctx> {
|
||||||
|
rtxn: &'ctx heed::RoTxn<'ctx>,
|
||||||
number_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
number_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
string_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
string_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
fields: &[(u16, bool)],
|
fields: &'ctx [(u16, bool)],
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> heed::Result<RoaringBitmap> {
|
}
|
||||||
let (field_id, ascending) = match fields.first() {
|
|
||||||
Some(first) => *first,
|
impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> {
|
||||||
None => return Ok(candidates),
|
/// Performs the sort and builds a [`SortedDocumentsIterator`].
|
||||||
|
fn build(self) -> heed::Result<SortedDocumentsIterator<'ctx>> {
|
||||||
|
let SortedDocumentsIteratorBuilder {
|
||||||
|
rtxn,
|
||||||
|
number_db,
|
||||||
|
string_db,
|
||||||
|
fields,
|
||||||
|
candidates,
|
||||||
|
} = self;
|
||||||
|
let size = candidates.len() as usize;
|
||||||
|
|
||||||
|
// There is no point sorting a 1-element array
|
||||||
|
if size <= 1 {
|
||||||
|
return Ok(SortedDocumentsIterator::Leaf {
|
||||||
|
size,
|
||||||
|
values: Box::new(candidates.into_iter()),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// There is no variable to sort on
|
||||||
|
let Some((field_id, ascending)) = fields.first().copied() else {
|
||||||
|
return Ok(SortedDocumentsIterator::Leaf {
|
||||||
|
size,
|
||||||
|
values: Box::new(candidates.into_iter()),
|
||||||
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Perform the sort on the first field
|
||||||
let (number_iter, string_iter) = if ascending {
|
let (number_iter, string_iter) = if ascending {
|
||||||
let number_iter = ascending_facet_sort(
|
let number_iter = ascending_facet_sort(
|
||||||
rtxn,
|
rtxn,
|
||||||
@ -46,33 +73,184 @@ fn recursive_facet_sort_inner<'t>(
|
|||||||
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
|
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
|
||||||
};
|
};
|
||||||
|
|
||||||
let chained_iter = number_iter.chain(string_iter);
|
// Create builders for the next level of the tree
|
||||||
let mut result = RoaringBitmap::new();
|
let number_db2 = number_db;
|
||||||
for part in chained_iter {
|
let string_db2 = string_db;
|
||||||
let (inner_candidates, _) = part?;
|
let number_iter = number_iter.map(move |r| -> heed::Result<SortedDocumentsIteratorBuilder> {
|
||||||
if inner_candidates.len() <= 1 || fields.len() <= 1 {
|
let (docids, _bytes) = r?;
|
||||||
result |= inner_candidates;
|
Ok(SortedDocumentsIteratorBuilder {
|
||||||
} else {
|
|
||||||
let inner_candidates = recursive_facet_sort_inner(
|
|
||||||
rtxn,
|
rtxn,
|
||||||
number_db,
|
number_db,
|
||||||
string_db,
|
string_db,
|
||||||
&fields[1..],
|
fields: &fields[1..],
|
||||||
inner_candidates,
|
candidates: docids,
|
||||||
)?;
|
})
|
||||||
result |= inner_candidates;
|
});
|
||||||
}
|
let string_iter = string_iter.map(move |r| -> heed::Result<SortedDocumentsIteratorBuilder> {
|
||||||
}
|
let (docids, _bytes) = r?;
|
||||||
|
Ok(SortedDocumentsIteratorBuilder {
|
||||||
|
rtxn,
|
||||||
|
number_db: number_db2,
|
||||||
|
string_db: string_db2,
|
||||||
|
fields: &fields[1..],
|
||||||
|
candidates: docids,
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
Ok(result)
|
Ok(SortedDocumentsIterator::Branch {
|
||||||
|
current_child: None,
|
||||||
|
next_children_size: size,
|
||||||
|
next_children: Box::new(number_iter.chain(string_iter)),
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn recursive_facet_sort<'t>(
|
/// A [`SortedDocumentsIterator`] allows efficient access to a continuous range of sorted documents.
|
||||||
index: &crate::Index,
|
/// This is ideal in the context of paginated queries in which only a small number of documents are needed at a time.
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
/// Search operations will only be performed upon access.
|
||||||
|
pub enum SortedDocumentsIterator<'ctx> {
|
||||||
|
Leaf {
|
||||||
|
/// The exact number of documents remaining
|
||||||
|
size: usize,
|
||||||
|
values: Box<dyn Iterator<Item = DocumentId> + 'ctx>
|
||||||
|
},
|
||||||
|
Branch {
|
||||||
|
/// The current child, got from the children iterator
|
||||||
|
current_child: Option<Box<SortedDocumentsIterator<'ctx>>>,
|
||||||
|
/// The exact number of documents remaining, excluding documents in the current child
|
||||||
|
next_children_size: usize,
|
||||||
|
/// Iterators to become the current child once it is exhausted
|
||||||
|
next_children: Box<dyn Iterator<Item = heed::Result<SortedDocumentsIteratorBuilder<'ctx>>> + 'ctx>,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SortedDocumentsIterator<'_> {
|
||||||
|
/// Takes care of updating the current child if it is `None`, and also updates the size
|
||||||
|
fn update_current<'ctx>(current_child: &mut Option<Box<SortedDocumentsIterator<'ctx>>>, next_children_size: &mut usize, next_children: &mut Box<dyn Iterator<Item = heed::Result<SortedDocumentsIteratorBuilder<'ctx>>> + 'ctx>) -> heed::Result<()> {
|
||||||
|
if current_child.is_none() {
|
||||||
|
*current_child = match next_children.next() {
|
||||||
|
Some(Ok(builder)) => {
|
||||||
|
let next_child = Box::new(builder.build()?);
|
||||||
|
*next_children_size -= next_child.size_hint().0;
|
||||||
|
Some(next_child)
|
||||||
|
},
|
||||||
|
Some(Err(e)) => return Err(e),
|
||||||
|
None => return Ok(()),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for SortedDocumentsIterator<'_> {
|
||||||
|
type Item = heed::Result<DocumentId>;
|
||||||
|
|
||||||
|
fn nth(&mut self, n: usize) -> Option<Self::Item> {
|
||||||
|
// If it's at the leaf level, just forward the call to the values iterator
|
||||||
|
let (current_child, next_children, next_children_size) = match self {
|
||||||
|
SortedDocumentsIterator::Leaf { values, size } => {
|
||||||
|
*size = size.saturating_sub(n);
|
||||||
|
return values.nth(n).map(Ok)
|
||||||
|
},
|
||||||
|
SortedDocumentsIterator::Branch { current_child, next_children, next_children_size } => (current_child, next_children, next_children_size),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Otherwise don't directly iterate over children, skip them if we know we will go further
|
||||||
|
let mut to_skip = n - 1;
|
||||||
|
while to_skip > 0 {
|
||||||
|
if let Err(e) = SortedDocumentsIterator::update_current(current_child, next_children_size, next_children) {
|
||||||
|
return Some(Err(e));
|
||||||
|
}
|
||||||
|
let Some(inner) = current_child else {
|
||||||
|
return None; // No more inner iterators, everything has been consumed.
|
||||||
|
};
|
||||||
|
|
||||||
|
if to_skip >= inner.size_hint().0 {
|
||||||
|
// The current child isn't large enough to contain the nth element.
|
||||||
|
// Skip it and continue with the next one.
|
||||||
|
to_skip -= inner.size_hint().0;
|
||||||
|
*current_child = None;
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
// The current iterator is large enough, so we can forward the call to it.
|
||||||
|
return inner.nth(to_skip + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.next()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
|
let size = match self {
|
||||||
|
SortedDocumentsIterator::Leaf { size, .. } => *size,
|
||||||
|
SortedDocumentsIterator::Branch { next_children_size, current_child: Some(current_child), .. } => current_child.size_hint().0 + next_children_size,
|
||||||
|
SortedDocumentsIterator::Branch { next_children_size, current_child: None, .. } => *next_children_size,
|
||||||
|
};
|
||||||
|
|
||||||
|
(size, Some(size))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
match self {
|
||||||
|
SortedDocumentsIterator::Leaf { values, size } => {
|
||||||
|
let result = values.next().map(Ok);
|
||||||
|
if result.is_some() {
|
||||||
|
*size -= 1;
|
||||||
|
}
|
||||||
|
result
|
||||||
|
},
|
||||||
|
SortedDocumentsIterator::Branch { current_child, next_children_size, next_children } => {
|
||||||
|
let mut result = None;
|
||||||
|
while result.is_none() {
|
||||||
|
// Ensure we have selected an iterator to work with
|
||||||
|
if let Err(e) = SortedDocumentsIterator::update_current(current_child, next_children_size, next_children) {
|
||||||
|
return Some(Err(e));
|
||||||
|
}
|
||||||
|
let Some(inner) = current_child else {
|
||||||
|
return None;
|
||||||
|
};
|
||||||
|
|
||||||
|
result = inner.next();
|
||||||
|
|
||||||
|
// If the current iterator is exhausted, we need to try the next one
|
||||||
|
if result.is_none() {
|
||||||
|
*current_child = None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A structure owning the data needed during the lifetime of a [`SortedDocumentsIterator`].
|
||||||
|
pub struct SortedDocuments<'ctx> {
|
||||||
|
rtxn: &'ctx heed::RoTxn<'ctx>,
|
||||||
|
fields: Vec<(u16, bool)>,
|
||||||
|
number_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
|
string_db: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||||
|
candidates: &'ctx RoaringBitmap,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl <'ctx> SortedDocuments<'ctx> {
|
||||||
|
pub fn iter(&'ctx self) -> heed::Result<SortedDocumentsIterator<'ctx>> {
|
||||||
|
let builder = SortedDocumentsIteratorBuilder {
|
||||||
|
rtxn: self.rtxn,
|
||||||
|
number_db: self.number_db,
|
||||||
|
string_db: self.string_db,
|
||||||
|
fields: &self.fields,
|
||||||
|
candidates: self.candidates.clone(),
|
||||||
|
};
|
||||||
|
builder.build()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn recursive_facet_sort<'ctx>(
|
||||||
|
index: &'ctx crate::Index,
|
||||||
|
rtxn: &'ctx heed::RoTxn<'ctx>,
|
||||||
sort: &[AscDesc],
|
sort: &[AscDesc],
|
||||||
candidates: RoaringBitmap,
|
candidates: &'ctx RoaringBitmap,
|
||||||
) -> crate::Result<RoaringBitmap> {
|
) -> crate::Result<SortedDocuments<'ctx>> {
|
||||||
check_sort_criteria(index, rtxn, Some(sort))?;
|
check_sort_criteria(index, rtxn, Some(sort))?;
|
||||||
|
|
||||||
let mut fields = Vec::new();
|
let mut fields = Vec::new();
|
||||||
@ -96,6 +274,11 @@ pub fn recursive_facet_sort<'t>(
|
|||||||
.facet_id_string_docids
|
.facet_id_string_docids
|
||||||
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
||||||
|
|
||||||
let candidates = recursive_facet_sort_inner(rtxn, number_db, string_db, &fields, candidates)?;
|
Ok(SortedDocuments {
|
||||||
Ok(candidates)
|
rtxn,
|
||||||
|
fields,
|
||||||
|
number_db,
|
||||||
|
string_db,
|
||||||
|
candidates,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user