mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-03-27 01:50:30 +01:00

Dont apply clippy for tests for now Fix clippy warnings of filter-parser package parent 8352febd646ec4bcf56a44161e5c4dce0e55111f author unvalley <38400669+unvalley@users.noreply.github.com> 1666325847 +0900 committer unvalley <kirohi.code@gmail.com> 1666791316 +0900 Update .github/workflows/rust.yml Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com> Allow clippy lint too_many_argments Allow clippy lint needless_collect Allow clippy lint too_many_arguments and type_complexity Fix for clippy warnings comparison_chains Fix for clippy warnings vec_init_then_push Allow clippy lint should_implement_trait Allow clippy lint drop_non_drop Fix lifetime clipy warnings in filter-paprser Execute cargo fmt Fix clippy remaining warnings Fix clippy remaining warnings again and allow lint on each place
219 lines
7.0 KiB
Rust
219 lines
7.0 KiB
Rust
use std::mem::size_of;
|
|
|
|
use concat_arrays::concat_arrays;
|
|
use heed::types::{ByteSlice, Str, Unit};
|
|
use roaring::RoaringBitmap;
|
|
|
|
use super::{Distinct, DocIter};
|
|
use crate::error::InternalError;
|
|
use crate::heed_codec::facet::{FacetGroupKey, *};
|
|
use crate::index::db_name;
|
|
use crate::{DocumentId, FieldId, Index, Result};
|
|
|
|
const FID_SIZE: usize = size_of::<FieldId>();
|
|
const DOCID_SIZE: usize = size_of::<DocumentId>();
|
|
|
|
/// A distinct implementer that is backed by facets.
|
|
///
|
|
/// On each iteration, the facet values for the
|
|
/// distinct attribute of the first document are retrieved. The document ids for these facet values
|
|
/// are then retrieved and taken out of the the candidate and added to the excluded set. We take
|
|
/// care to keep the document we are currently on, and remove it from the excluded list. The next
|
|
/// iterations will never contain any occurence of a document with the same distinct value as a
|
|
/// document from previous iterations.
|
|
#[derive(Clone)]
|
|
pub struct FacetDistinct<'a> {
|
|
distinct: FieldId,
|
|
index: &'a Index,
|
|
txn: &'a heed::RoTxn<'a>,
|
|
}
|
|
|
|
impl<'a> FacetDistinct<'a> {
|
|
pub fn new(distinct: FieldId, index: &'a Index, txn: &'a heed::RoTxn<'a>) -> Self {
|
|
Self { distinct, index, txn }
|
|
}
|
|
}
|
|
|
|
pub struct FacetDistinctIter<'a> {
|
|
candidates: RoaringBitmap,
|
|
distinct: FieldId,
|
|
excluded: RoaringBitmap,
|
|
index: &'a Index,
|
|
iter_offset: usize,
|
|
txn: &'a heed::RoTxn<'a>,
|
|
}
|
|
|
|
impl<'a> FacetDistinctIter<'a> {
|
|
fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> {
|
|
self.index
|
|
.facet_id_string_docids
|
|
.get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key })
|
|
.map(|opt| opt.map(|v| v.bitmap))
|
|
}
|
|
|
|
fn facet_number_docids(&self, key: f64) -> heed::Result<Option<RoaringBitmap>> {
|
|
// get facet docids on level 0
|
|
self.index
|
|
.facet_id_f64_docids
|
|
.get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key })
|
|
.map(|opt| opt.map(|v| v.bitmap))
|
|
}
|
|
|
|
fn distinct_string(&mut self, id: DocumentId) -> Result<()> {
|
|
let iter = facet_string_values(id, self.distinct, self.index, self.txn)?;
|
|
|
|
for item in iter {
|
|
let ((_, _, value), _) = item?;
|
|
let facet_docids =
|
|
self.facet_string_docids(value)?.ok_or(InternalError::DatabaseMissingEntry {
|
|
db_name: db_name::FACET_ID_STRING_DOCIDS,
|
|
key: None,
|
|
})?;
|
|
self.excluded |= facet_docids;
|
|
}
|
|
|
|
self.excluded.remove(id);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn distinct_number(&mut self, id: DocumentId) -> Result<()> {
|
|
let iter = facet_number_values(id, self.distinct, self.index, self.txn)?;
|
|
|
|
for item in iter {
|
|
let ((_, _, value), _) = item?;
|
|
let facet_docids =
|
|
self.facet_number_docids(value)?.ok_or(InternalError::DatabaseMissingEntry {
|
|
db_name: db_name::FACET_ID_F64_DOCIDS,
|
|
key: None,
|
|
})?;
|
|
self.excluded |= facet_docids;
|
|
}
|
|
|
|
self.excluded.remove(id);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Performs the next iteration of the facet distinct. This is a convenience method that is
|
|
/// called by the Iterator::next implementation that transposes the result. It makes error
|
|
/// handling easier.
|
|
fn next_inner(&mut self) -> Result<Option<DocumentId>> {
|
|
// The first step is to remove all the excluded documents from our candidates
|
|
self.candidates -= &self.excluded;
|
|
|
|
let mut candidates_iter = self.candidates.iter().skip(self.iter_offset);
|
|
match candidates_iter.next() {
|
|
Some(id) => {
|
|
// We distinct the document id on its facet strings and facet numbers.
|
|
self.distinct_string(id)?;
|
|
self.distinct_number(id)?;
|
|
|
|
// The first document of each iteration is kept, since the next call to
|
|
// `difference_with` will filter out all the documents for that facet value. By
|
|
// increasing the offset we make sure to get the first valid value for the next
|
|
// distinct document to keep.
|
|
self.iter_offset += 1;
|
|
|
|
Ok(Some(id))
|
|
}
|
|
// no more candidate at this offset, return.
|
|
None => Ok(None),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::drop_non_drop)]
|
|
fn facet_values_prefix_key(distinct: FieldId, id: DocumentId) -> [u8; FID_SIZE + DOCID_SIZE] {
|
|
concat_arrays!(distinct.to_be_bytes(), id.to_be_bytes())
|
|
}
|
|
|
|
fn facet_number_values<'a>(
|
|
id: DocumentId,
|
|
distinct: FieldId,
|
|
index: &Index,
|
|
txn: &'a heed::RoTxn,
|
|
) -> Result<heed::RoPrefix<'a, FieldDocIdFacetF64Codec, Unit>> {
|
|
let key = facet_values_prefix_key(distinct, id);
|
|
|
|
let iter = index
|
|
.field_id_docid_facet_f64s
|
|
.remap_key_type::<ByteSlice>()
|
|
.prefix_iter(txn, &key)?
|
|
.remap_key_type::<FieldDocIdFacetF64Codec>();
|
|
|
|
Ok(iter)
|
|
}
|
|
|
|
fn facet_string_values<'a>(
|
|
id: DocumentId,
|
|
distinct: FieldId,
|
|
index: &Index,
|
|
txn: &'a heed::RoTxn,
|
|
) -> Result<heed::RoPrefix<'a, FieldDocIdFacetStringCodec, Str>> {
|
|
let key = facet_values_prefix_key(distinct, id);
|
|
|
|
let iter = index
|
|
.field_id_docid_facet_strings
|
|
.remap_key_type::<ByteSlice>()
|
|
.prefix_iter(txn, &key)?
|
|
.remap_types::<FieldDocIdFacetStringCodec, Str>();
|
|
|
|
Ok(iter)
|
|
}
|
|
|
|
impl Iterator for FacetDistinctIter<'_> {
|
|
type Item = Result<DocumentId>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
self.next_inner().transpose()
|
|
}
|
|
}
|
|
|
|
impl DocIter for FacetDistinctIter<'_> {
|
|
fn into_excluded(self) -> RoaringBitmap {
|
|
self.excluded
|
|
}
|
|
}
|
|
|
|
impl<'a> Distinct for FacetDistinct<'a> {
|
|
type Iter = FacetDistinctIter<'a>;
|
|
|
|
fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter {
|
|
FacetDistinctIter {
|
|
candidates,
|
|
distinct: self.distinct,
|
|
excluded,
|
|
index: self.index,
|
|
iter_offset: 0,
|
|
txn: self.txn,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::super::test::{generate_index, validate_distinct_candidates};
|
|
use super::*;
|
|
|
|
macro_rules! test_facet_distinct {
|
|
($name:ident, $distinct:literal) => {
|
|
#[test]
|
|
fn $name() {
|
|
let (index, fid, candidates) = generate_index($distinct);
|
|
let txn = index.read_txn().unwrap();
|
|
let mut map_distinct = FacetDistinct::new(fid, &index, &txn);
|
|
let excluded = RoaringBitmap::new();
|
|
let mut iter = map_distinct.distinct(candidates.clone(), excluded);
|
|
let count = validate_distinct_candidates(iter.by_ref(), fid, &index);
|
|
let excluded = iter.into_excluded();
|
|
assert_eq!(count as u64 + excluded.len(), candidates.len());
|
|
}
|
|
};
|
|
}
|
|
|
|
test_facet_distinct!(test_string, "txt");
|
|
test_facet_distinct!(test_strings, "txts");
|
|
test_facet_distinct!(test_number, "cat-int");
|
|
}
|