Remove the MapDistinct struct as now distinct attributes are faceted

This commit is contained in:
Kerollmops 2021-06-01 12:32:03 +02:00
parent ff440c1d9d
commit 187c713de5
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
4 changed files with 5 additions and 159 deletions

View File

@ -189,8 +189,6 @@ impl<'a> Distinct<'_> for FacetDistinct<'a> {
#[cfg(test)]
mod test {
use std::collections::HashSet;
use super::super::test::{generate_index, validate_distinct_candidates};
use super::*;
@ -198,10 +196,7 @@ mod test {
($name:ident, $distinct:literal) => {
#[test]
fn $name() {
use std::iter::FromIterator;
let facets = HashSet::from_iter(Some(($distinct.to_string())));
let (index, fid, candidates) = generate_index($distinct, facets);
let (index, fid, candidates) = generate_index($distinct);
let txn = index.read_txn().unwrap();
let mut map_distinct = FacetDistinct::new(fid, &index, &txn);
let excluded = RoaringBitmap::new();

View File

@ -1,138 +0,0 @@
use std::collections::HashMap;
use roaring::RoaringBitmap;
use serde_json::Value;
use super::{Distinct, DocIter};
use crate::{DocumentId, FieldId, Index};
/// A distinct implementer that is backed by an `HashMap`.
///
/// Each time a document is seen, the value
/// for its distinct field is added to the map. If the map already contains an entry for this
/// value, then the document is filtered out, and is added to the excluded set.
pub struct MapDistinct<'a> {
distinct: FieldId,
map: HashMap<String, usize>,
index: &'a Index,
txn: &'a heed::RoTxn<'a>,
}
impl<'a> MapDistinct<'a> {
pub fn new(distinct: FieldId, index: &'a Index, txn: &'a heed::RoTxn<'a>) -> Self {
Self {
distinct,
map: HashMap::new(),
index,
txn,
}
}
}
pub struct MapDistinctIter<'a, 'b> {
distinct: FieldId,
map: &'b mut HashMap<String, usize>,
index: &'a Index,
txn: &'a heed::RoTxn<'a>,
candidates: roaring::bitmap::IntoIter,
excluded: RoaringBitmap,
}
impl<'a, 'b> MapDistinctIter<'a, 'b> {
/// Performs the next iteration of the mafacetp distinct. This is a convenience method that is
/// called by the Iterator::next implementation that transposes the result. It makes error
/// handling easier.
fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> {
let map = &mut self.map;
let mut filter = |value: Value| {
let entry = map.entry(value.to_string()).or_insert(0);
*entry += 1;
*entry <= 1
};
while let Some(id) = self.candidates.next() {
let document = self.index.documents(&self.txn, Some(id))?[0].1;
let value = document
.get(self.distinct)
.map(serde_json::from_slice::<Value>)
.transpose()?;
let accept = match value {
Some(Value::Array(values)) => {
let mut accept = true;
for value in values {
accept &= filter(value);
}
accept
}
Some(Value::Null) | Some(Value::Object(_)) | None => true,
Some(value) => filter(value),
};
if accept {
return Ok(Some(id));
} else {
self.excluded.insert(id);
}
}
Ok(None)
}
}
impl Iterator for MapDistinctIter<'_, '_> {
type Item = anyhow::Result<DocumentId>;
fn next(&mut self) -> Option<Self::Item> {
self.next_inner().transpose()
}
}
impl DocIter for MapDistinctIter<'_, '_> {
fn into_excluded(self) -> RoaringBitmap {
self.excluded
}
}
impl<'a, 'b> Distinct<'b> for MapDistinct<'a> {
type Iter = MapDistinctIter<'a, 'b>;
fn distinct(&'b mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter {
MapDistinctIter {
distinct: self.distinct,
map: &mut self.map,
index: &self.index,
txn: &self.txn,
candidates: candidates.into_iter(),
excluded,
}
}
}
#[cfg(test)]
mod test {
use std::collections::HashSet;
use super::*;
use super::super::test::{generate_index, validate_distinct_candidates};
macro_rules! test_map_distinct {
($name:ident, $distinct:literal) => {
#[test]
fn $name() {
let (index, fid, candidates) = generate_index($distinct, HashSet::new());
let txn = index.read_txn().unwrap();
let mut map_distinct = MapDistinct::new(fid, &index, &txn);
let excluded = RoaringBitmap::new();
let mut iter = map_distinct.distinct(candidates.clone(), excluded);
let count = validate_distinct_candidates(iter.by_ref(), fid, &index);
let excluded = iter.into_excluded();
assert_eq!(count as u64 + excluded.len(), candidates.len());
}
};
}
test_map_distinct!(test_string, "txt");
test_map_distinct!(test_strings, "txts");
test_map_distinct!(test_int, "cat-int");
test_map_distinct!(test_ints, "cat-ints");
}

View File

@ -1,12 +1,10 @@
mod facet_distinct;
mod map_distinct;
mod noop_distinct;
use roaring::RoaringBitmap;
use crate::DocumentId;
pub use facet_distinct::FacetDistinct;
pub use map_distinct::MapDistinct;
pub use noop_distinct::NoopDistinct;
/// A trait implemented by document interators that are returned by calls to `Distinct::distinct`.
@ -74,7 +72,7 @@ mod test {
/// Returns a temporary index populated with random test documents, the FieldId for the
/// distinct attribute, and the RoaringBitmap with the document ids.
pub(crate) fn generate_index(distinct: &str, facets: HashSet<String>) -> (TempIndex, FieldId, RoaringBitmap) {
pub(crate) fn generate_index(distinct: &str) -> (TempIndex, FieldId, RoaringBitmap) {
let index = TempIndex::new();
let mut txn = index.write_txn().unwrap();
@ -82,9 +80,6 @@ mod test {
let builder = UpdateBuilder::new(0);
let mut update = builder.settings(&mut txn, &index);
update.set_distinct_attribute(distinct.to_string());
if !facets.is_empty() {
update.set_filterable_fields(facets)
}
update.execute(|_, _| ()).unwrap();
// add documents to the index

View File

@ -12,7 +12,7 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap;
use distinct::{Distinct, DocIter, FacetDistinct, MapDistinct, NoopDistinct};
use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
use crate::search::criteria::r#final::{Final, FinalResult};
use crate::{Index, DocumentId};
@ -141,14 +141,8 @@ impl<'a> Search<'a> {
Some(name) => {
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
let id = field_ids_map.id(name).expect("distinct not present in field map");
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
if filterable_fields.contains(name) {
let distinct = FacetDistinct::new(id, self.index, self.rtxn);
self.perform_sort(distinct, matching_words, criteria)
} else {
let distinct = MapDistinct::new(id, self.index, self.rtxn);
self.perform_sort(distinct, matching_words, criteria)
}
let distinct = FacetDistinct::new(id, self.index, self.rtxn);
self.perform_sort(distinct, matching_words, criteria)
}
}
}