mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 11:47:28 +01:00
add documentation
This commit is contained in:
parent
45c45e11dd
commit
2f73fa55ae
@ -343,6 +343,20 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/* Distinct attribute */
|
||||
|
||||
pub(crate) fn put_distinct_attribute(&self, wtxn: &mut RwTxn, distinct_attribute: &str) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY, distinct_attribute)
|
||||
}
|
||||
|
||||
pub fn distinct_attribute<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
|
||||
self.main.get::<_, Str, Str>(rtxn, DISTINCT_ATTRIBUTE_KEY)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_distinct_attribute(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY)
|
||||
}
|
||||
|
||||
/* criteria */
|
||||
|
||||
pub fn put_criteria(&self, wtxn: &mut RwTxn, criteria: &[Criterion]) -> heed::Result<()> {
|
||||
@ -462,17 +476,6 @@ impl Index {
|
||||
self.main.put::<_, Str, SerdeJson<DateTime<Utc>>>(wtxn, UPDATED_AT_KEY, &time)
|
||||
}
|
||||
|
||||
pub(crate) fn put_distinct_attribute(&self, wtxn: &mut RwTxn, distinct_attribute: &str) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY, distinct_attribute)
|
||||
}
|
||||
|
||||
pub fn distinct_attribute<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
|
||||
self.main.get::<_, Str, Str>(rtxn, DISTINCT_ATTRIBUTE_KEY)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_distinct_attribute(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -6,6 +6,12 @@ use crate::heed_codec::facet::*;
|
||||
use crate::{facet::FacetType, DocumentId, FieldId, Index};
|
||||
use super::{Distinct, DocIter};
|
||||
|
||||
/// A distinct implementer that is backed by facets. On each iteration, the facet values for the
|
||||
/// distinct attribute of the first document are retrieved. The document ids for these facet values
|
||||
/// are then retrieved and taken out of the the candidate and added to the excluded set. We take
|
||||
/// care to keep the document we are currently on, and remove it from the excluded list. The next
|
||||
/// iterations will never contain any occurence of a document with the same distinct value as a
|
||||
/// document from previous iterations.
|
||||
pub struct FacetDistinct<'a> {
|
||||
distinct: FieldId,
|
||||
index: &'a Index,
|
||||
@ -114,6 +120,9 @@ impl<'a> FacetDistinctIter<'a> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Performs the next iteration of the facet distinct. This is a convenience method that is
|
||||
/// called by the Iterator::next implementation that tranposes the result. It makes error
|
||||
/// handling easier.
|
||||
fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> {
|
||||
// The first step is to remove all the excluded documents from our candidates
|
||||
self.candidates.difference_with(&self.excluded);
|
||||
@ -127,8 +136,10 @@ impl<'a> FacetDistinctIter<'a> {
|
||||
FacetType::Float => self.distinct_float(id)?,
|
||||
};
|
||||
|
||||
// On every iteration, the first document is always a distinct one, since it
|
||||
// hasn't been discarded by the previous difference.
|
||||
// The first document of each iteration is kept, since the next call to
|
||||
// `difference_with` will filter out all the documents for that facet value. By
|
||||
// increasing the offset we make sure to get the first valid value for the next
|
||||
// distinct document to keep.
|
||||
self.iter_offset += 1;
|
||||
Ok(Some(id))
|
||||
}
|
||||
|
@ -6,6 +6,9 @@ use serde_json::Value;
|
||||
use super::{Distinct, DocIter};
|
||||
use crate::{DocumentId, FieldId, Index};
|
||||
|
||||
/// A distinct implementer that is backed by an `HashMap`. Each time a document is seen, the value
|
||||
/// for its distinct field is added to the map. If the map already contains an entry for this
|
||||
/// value, then the document is filtered out, and is added to the excluded set.
|
||||
pub struct MapDistinct<'a> {
|
||||
distinct: FieldId,
|
||||
map: HashMap<String, usize>,
|
||||
|
@ -9,11 +9,17 @@ pub use map_distinct::MapDistinct;
|
||||
pub use noop_distinct::NoopDistinct;
|
||||
use crate::DocumentId;
|
||||
|
||||
/// A trait implemented by document interators that are returned by calls to `Distinct::distinct`.
|
||||
/// It provides a way to get back the ownership to the excluded set.
|
||||
pub trait DocIter: Iterator<Item=anyhow::Result<DocumentId>> {
|
||||
/// Returns ownership on the internal RoaringBitmaps: (candidates, excluded)
|
||||
/// Returns ownership on the internal exluded set.
|
||||
fn into_excluded(self) -> RoaringBitmap;
|
||||
}
|
||||
|
||||
/// A trait that is implemented by structs that perform a distinct on `candidates`. Calling distinct
|
||||
/// must return an iterator containing only distinct documents, and add the discarded documents to
|
||||
/// the excluded set. The excluded set can later be retrieved by calling `DocIter::excluded` on the
|
||||
/// returned iterator.
|
||||
pub trait Distinct<'a> {
|
||||
type Iter: DocIter;
|
||||
|
||||
|
@ -1,12 +1,14 @@
|
||||
use roaring::RoaringBitmap;
|
||||
use roaring::{RoaringBitmap, bitmap::IntoIter};
|
||||
|
||||
use crate::DocumentId;
|
||||
use super::{DocIter, Distinct};
|
||||
|
||||
/// A distinct implementer that does not perform any distinct, and simply returns an iterator to
|
||||
/// the candidates.
|
||||
pub struct NoopDistinct;
|
||||
|
||||
pub struct NoopDistinctIter {
|
||||
candidates: roaring::bitmap::IntoIter,
|
||||
candidates: IntoIter,
|
||||
excluded: RoaringBitmap,
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user