mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 21:20:24 +01:00
Remove the MapDistinct struct as now distinct attributes are faceted
This commit is contained in:
parent
ff440c1d9d
commit
187c713de5
@ -189,8 +189,6 @@ impl<'a> Distinct<'_> for FacetDistinct<'a> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::super::test::{generate_index, validate_distinct_candidates};
|
||||
use super::*;
|
||||
|
||||
@ -198,10 +196,7 @@ mod test {
|
||||
($name:ident, $distinct:literal) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
use std::iter::FromIterator;
|
||||
|
||||
let facets = HashSet::from_iter(Some(($distinct.to_string())));
|
||||
let (index, fid, candidates) = generate_index($distinct, facets);
|
||||
let (index, fid, candidates) = generate_index($distinct);
|
||||
let txn = index.read_txn().unwrap();
|
||||
let mut map_distinct = FacetDistinct::new(fid, &index, &txn);
|
||||
let excluded = RoaringBitmap::new();
|
||||
|
@ -1,138 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::{Distinct, DocIter};
|
||||
use crate::{DocumentId, FieldId, Index};
|
||||
|
||||
/// A distinct implementer that is backed by an `HashMap`.
|
||||
///
|
||||
/// Each time a document is seen, the value
|
||||
/// for its distinct field is added to the map. If the map already contains an entry for this
|
||||
/// value, then the document is filtered out, and is added to the excluded set.
|
||||
pub struct MapDistinct<'a> {
|
||||
distinct: FieldId,
|
||||
map: HashMap<String, usize>,
|
||||
index: &'a Index,
|
||||
txn: &'a heed::RoTxn<'a>,
|
||||
}
|
||||
|
||||
impl<'a> MapDistinct<'a> {
|
||||
pub fn new(distinct: FieldId, index: &'a Index, txn: &'a heed::RoTxn<'a>) -> Self {
|
||||
Self {
|
||||
distinct,
|
||||
map: HashMap::new(),
|
||||
index,
|
||||
txn,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MapDistinctIter<'a, 'b> {
|
||||
distinct: FieldId,
|
||||
map: &'b mut HashMap<String, usize>,
|
||||
index: &'a Index,
|
||||
txn: &'a heed::RoTxn<'a>,
|
||||
candidates: roaring::bitmap::IntoIter,
|
||||
excluded: RoaringBitmap,
|
||||
}
|
||||
|
||||
impl<'a, 'b> MapDistinctIter<'a, 'b> {
|
||||
/// Performs the next iteration of the mafacetp distinct. This is a convenience method that is
|
||||
/// called by the Iterator::next implementation that transposes the result. It makes error
|
||||
/// handling easier.
|
||||
fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> {
|
||||
let map = &mut self.map;
|
||||
let mut filter = |value: Value| {
|
||||
let entry = map.entry(value.to_string()).or_insert(0);
|
||||
*entry += 1;
|
||||
*entry <= 1
|
||||
};
|
||||
|
||||
while let Some(id) = self.candidates.next() {
|
||||
let document = self.index.documents(&self.txn, Some(id))?[0].1;
|
||||
let value = document
|
||||
.get(self.distinct)
|
||||
.map(serde_json::from_slice::<Value>)
|
||||
.transpose()?;
|
||||
|
||||
let accept = match value {
|
||||
Some(Value::Array(values)) => {
|
||||
let mut accept = true;
|
||||
for value in values {
|
||||
accept &= filter(value);
|
||||
}
|
||||
accept
|
||||
}
|
||||
Some(Value::Null) | Some(Value::Object(_)) | None => true,
|
||||
Some(value) => filter(value),
|
||||
};
|
||||
|
||||
if accept {
|
||||
return Ok(Some(id));
|
||||
} else {
|
||||
self.excluded.insert(id);
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for MapDistinctIter<'_, '_> {
|
||||
type Item = anyhow::Result<DocumentId>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.next_inner().transpose()
|
||||
}
|
||||
}
|
||||
|
||||
impl DocIter for MapDistinctIter<'_, '_> {
|
||||
fn into_excluded(self) -> RoaringBitmap {
|
||||
self.excluded
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'b> Distinct<'b> for MapDistinct<'a> {
|
||||
type Iter = MapDistinctIter<'a, 'b>;
|
||||
|
||||
fn distinct(&'b mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter {
|
||||
MapDistinctIter {
|
||||
distinct: self.distinct,
|
||||
map: &mut self.map,
|
||||
index: &self.index,
|
||||
txn: &self.txn,
|
||||
candidates: candidates.into_iter(),
|
||||
excluded,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::*;
|
||||
use super::super::test::{generate_index, validate_distinct_candidates};
|
||||
|
||||
macro_rules! test_map_distinct {
|
||||
($name:ident, $distinct:literal) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let (index, fid, candidates) = generate_index($distinct, HashSet::new());
|
||||
let txn = index.read_txn().unwrap();
|
||||
let mut map_distinct = MapDistinct::new(fid, &index, &txn);
|
||||
let excluded = RoaringBitmap::new();
|
||||
let mut iter = map_distinct.distinct(candidates.clone(), excluded);
|
||||
let count = validate_distinct_candidates(iter.by_ref(), fid, &index);
|
||||
let excluded = iter.into_excluded();
|
||||
assert_eq!(count as u64 + excluded.len(), candidates.len());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_map_distinct!(test_string, "txt");
|
||||
test_map_distinct!(test_strings, "txts");
|
||||
test_map_distinct!(test_int, "cat-int");
|
||||
test_map_distinct!(test_ints, "cat-ints");
|
||||
}
|
@ -1,12 +1,10 @@
|
||||
mod facet_distinct;
|
||||
mod map_distinct;
|
||||
mod noop_distinct;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::DocumentId;
|
||||
pub use facet_distinct::FacetDistinct;
|
||||
pub use map_distinct::MapDistinct;
|
||||
pub use noop_distinct::NoopDistinct;
|
||||
|
||||
/// A trait implemented by document interators that are returned by calls to `Distinct::distinct`.
|
||||
@ -74,7 +72,7 @@ mod test {
|
||||
|
||||
/// Returns a temporary index populated with random test documents, the FieldId for the
|
||||
/// distinct attribute, and the RoaringBitmap with the document ids.
|
||||
pub(crate) fn generate_index(distinct: &str, facets: HashSet<String>) -> (TempIndex, FieldId, RoaringBitmap) {
|
||||
pub(crate) fn generate_index(distinct: &str) -> (TempIndex, FieldId, RoaringBitmap) {
|
||||
let index = TempIndex::new();
|
||||
let mut txn = index.write_txn().unwrap();
|
||||
|
||||
@ -82,9 +80,6 @@ mod test {
|
||||
let builder = UpdateBuilder::new(0);
|
||||
let mut update = builder.settings(&mut txn, &index);
|
||||
update.set_distinct_attribute(distinct.to_string());
|
||||
if !facets.is_empty() {
|
||||
update.set_filterable_fields(facets)
|
||||
}
|
||||
update.execute(|_, _| ()).unwrap();
|
||||
|
||||
// add documents to the index
|
||||
|
@ -12,7 +12,7 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
||||
use once_cell::sync::Lazy;
|
||||
use roaring::bitmap::RoaringBitmap;
|
||||
|
||||
use distinct::{Distinct, DocIter, FacetDistinct, MapDistinct, NoopDistinct};
|
||||
use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
|
||||
use crate::search::criteria::r#final::{Final, FinalResult};
|
||||
use crate::{Index, DocumentId};
|
||||
|
||||
@ -141,14 +141,8 @@ impl<'a> Search<'a> {
|
||||
Some(name) => {
|
||||
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let id = field_ids_map.id(name).expect("distinct not present in field map");
|
||||
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
|
||||
if filterable_fields.contains(name) {
|
||||
let distinct = FacetDistinct::new(id, self.index, self.rtxn);
|
||||
self.perform_sort(distinct, matching_words, criteria)
|
||||
} else {
|
||||
let distinct = MapDistinct::new(id, self.index, self.rtxn);
|
||||
self.perform_sort(distinct, matching_words, criteria)
|
||||
}
|
||||
let distinct = FacetDistinct::new(id, self.index, self.rtxn);
|
||||
self.perform_sort(distinct, matching_words, criteria)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user