mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
add tests
This commit is contained in:
parent
75464a1baa
commit
9c4660d3d6
@ -478,13 +478,44 @@ impl Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
pub(crate) mod tests {
|
||||||
|
use std::ops::Deref;
|
||||||
|
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::hashmap;
|
use maplit::hashmap;
|
||||||
|
use tempfile::TempDir;
|
||||||
|
|
||||||
use crate::Index;
|
use crate::Index;
|
||||||
use crate::update::{IndexDocuments, UpdateFormat};
|
use crate::update::{IndexDocuments, UpdateFormat};
|
||||||
|
|
||||||
|
pub(crate) struct TempIndex {
|
||||||
|
inner: Index,
|
||||||
|
_tempdir: TempDir,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deref for TempIndex {
|
||||||
|
type Target = Index;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.inner
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TempIndex {
|
||||||
|
/// Creates a temporary index, with a default `4096 * 100` size. This should be enough for
|
||||||
|
/// most tests.
|
||||||
|
pub fn new() -> Self {
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(100 * 4096);
|
||||||
|
let _tempdir = TempDir::new_in(".").unwrap();
|
||||||
|
let inner = Index::new(options, _tempdir.path()).unwrap();
|
||||||
|
Self {
|
||||||
|
inner,
|
||||||
|
_tempdir
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn initial_fields_distribution() {
|
fn initial_fields_distribution() {
|
||||||
let path = tempfile::tempdir().unwrap();
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
@ -6,7 +6,9 @@ use crate::heed_codec::facet::*;
|
|||||||
use crate::{facet::FacetType, DocumentId, FieldId, Index};
|
use crate::{facet::FacetType, DocumentId, FieldId, Index};
|
||||||
use super::{Distinct, DocIter};
|
use super::{Distinct, DocIter};
|
||||||
|
|
||||||
/// A distinct implementer that is backed by facets. On each iteration, the facet values for the
|
/// A distinct implementer that is backed by facets.
|
||||||
|
///
|
||||||
|
/// On each iteration, the facet values for the
|
||||||
/// distinct attribute of the first document are retrieved. The document ids for these facet values
|
/// distinct attribute of the first document are retrieved. The document ids for these facet values
|
||||||
/// are then retrieved and taken out of the the candidate and added to the excluded set. We take
|
/// are then retrieved and taken out of the the candidate and added to the excluded set. We take
|
||||||
/// care to keep the document we are currently on, and remove it from the excluded list. The next
|
/// care to keep the document we are currently on, and remove it from the excluded list. The next
|
||||||
@ -121,7 +123,7 @@ impl<'a> FacetDistinctIter<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Performs the next iteration of the facet distinct. This is a convenience method that is
|
/// Performs the next iteration of the facet distinct. This is a convenience method that is
|
||||||
/// called by the Iterator::next implementation that tranposes the result. It makes error
|
/// called by the Iterator::next implementation that transposes the result. It makes error
|
||||||
/// handling easier.
|
/// handling easier.
|
||||||
fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> {
|
fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> {
|
||||||
// The first step is to remove all the excluded documents from our candidates
|
// The first step is to remove all the excluded documents from our candidates
|
||||||
@ -201,3 +203,36 @@ impl<'a> Distinct<'_> for FacetDistinct<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use super::super::test::{generate_index, validate_distinct_candidates};
|
||||||
|
use crate::facet::FacetType;
|
||||||
|
|
||||||
|
macro_rules! test_facet_distinct {
|
||||||
|
($name:ident, $distinct:literal, $facet_type:expr) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
use std::iter::FromIterator;
|
||||||
|
|
||||||
|
let facets = HashMap::from_iter(Some(($distinct.to_string(), $facet_type.to_string())));
|
||||||
|
let (index, fid, candidates) = generate_index($distinct, facets);
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
let mut map_distinct = FacetDistinct::new(fid, &index, &txn, $facet_type);
|
||||||
|
let excluded = RoaringBitmap::new();
|
||||||
|
let mut iter = map_distinct.distinct(candidates.clone(), excluded);
|
||||||
|
let count = validate_distinct_candidates(iter.by_ref(), fid, &index);
|
||||||
|
let excluded = iter.into_excluded();
|
||||||
|
assert_eq!(count as u64 + excluded.len(), candidates.len());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
test_facet_distinct!(test_string, "txt", FacetType::String);
|
||||||
|
test_facet_distinct!(test_strings, "txts", FacetType::String);
|
||||||
|
test_facet_distinct!(test_int, "cat-int", FacetType::Integer);
|
||||||
|
test_facet_distinct!(test_ints, "cat-ints", FacetType::Integer);
|
||||||
|
}
|
||||||
|
@ -6,7 +6,9 @@ use serde_json::Value;
|
|||||||
use super::{Distinct, DocIter};
|
use super::{Distinct, DocIter};
|
||||||
use crate::{DocumentId, FieldId, Index};
|
use crate::{DocumentId, FieldId, Index};
|
||||||
|
|
||||||
/// A distinct implementer that is backed by an `HashMap`. Each time a document is seen, the value
|
/// A distinct implementer that is backed by an `HashMap`.
|
||||||
|
///
|
||||||
|
/// Each time a document is seen, the value
|
||||||
/// for its distinct field is added to the map. If the map already contains an entry for this
|
/// for its distinct field is added to the map. If the map already contains an entry for this
|
||||||
/// value, then the document is filtered out, and is added to the excluded set.
|
/// value, then the document is filtered out, and is added to the excluded set.
|
||||||
pub struct MapDistinct<'a> {
|
pub struct MapDistinct<'a> {
|
||||||
@ -38,7 +40,7 @@ pub struct MapDistinctIter<'a, 'b> {
|
|||||||
|
|
||||||
impl<'a, 'b> MapDistinctIter<'a, 'b> {
|
impl<'a, 'b> MapDistinctIter<'a, 'b> {
|
||||||
/// Performs the next iteration of the mafacetp distinct. This is a convenience method that is
|
/// Performs the next iteration of the mafacetp distinct. This is a convenience method that is
|
||||||
/// called by the Iterator::next implementation that tranposes the result. It makes error
|
/// called by the Iterator::next implementation that transposes the result. It makes error
|
||||||
/// handling easier.
|
/// handling easier.
|
||||||
fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> {
|
fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> {
|
||||||
let map = &mut self.map;
|
let map = &mut self.map;
|
||||||
@ -105,3 +107,32 @@ impl<'a, 'b> Distinct<'b> for MapDistinct<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use super::super::test::{generate_index, validate_distinct_candidates};
|
||||||
|
|
||||||
|
macro_rules! test_map_distinct {
|
||||||
|
($name:ident, $distinct:literal) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
let (index, fid, candidates) = generate_index($distinct, HashMap::new());
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
let mut map_distinct = MapDistinct::new(fid, &index, &txn);
|
||||||
|
let excluded = RoaringBitmap::new();
|
||||||
|
let mut iter = map_distinct.distinct(candidates.clone(), excluded);
|
||||||
|
let count = validate_distinct_candidates(iter.by_ref(), fid, &index);
|
||||||
|
let excluded = iter.into_excluded();
|
||||||
|
assert_eq!(count as u64 + excluded.len(), candidates.len());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
test_map_distinct!(test_string, "txt");
|
||||||
|
test_map_distinct!(test_strings, "txts");
|
||||||
|
test_map_distinct!(test_int, "cat-int");
|
||||||
|
test_map_distinct!(test_ints, "cat-ints");
|
||||||
|
}
|
||||||
|
@ -4,14 +4,14 @@ mod noop_distinct;
|
|||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::DocumentId;
|
||||||
pub use facet_distinct::FacetDistinct;
|
pub use facet_distinct::FacetDistinct;
|
||||||
pub use map_distinct::MapDistinct;
|
pub use map_distinct::MapDistinct;
|
||||||
pub use noop_distinct::NoopDistinct;
|
pub use noop_distinct::NoopDistinct;
|
||||||
use crate::DocumentId;
|
|
||||||
|
|
||||||
/// A trait implemented by document interators that are returned by calls to `Distinct::distinct`.
|
/// A trait implemented by document interators that are returned by calls to `Distinct::distinct`.
|
||||||
/// It provides a way to get back the ownership to the excluded set.
|
/// It provides a way to get back the ownership to the excluded set.
|
||||||
pub trait DocIter: Iterator<Item=anyhow::Result<DocumentId>> {
|
pub trait DocIter: Iterator<Item = anyhow::Result<DocumentId>> {
|
||||||
/// Returns ownership on the internal exluded set.
|
/// Returns ownership on the internal exluded set.
|
||||||
fn into_excluded(self) -> RoaringBitmap;
|
fn into_excluded(self) -> RoaringBitmap;
|
||||||
}
|
}
|
||||||
@ -25,3 +25,120 @@ pub trait Distinct<'a> {
|
|||||||
|
|
||||||
fn distinct(&'a mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter;
|
fn distinct(&'a mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
|
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use rand::{seq::SliceRandom, Rng};
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
use serde_json::{json, Value};
|
||||||
|
|
||||||
|
use crate::index::{Index, tests::TempIndex};
|
||||||
|
use crate::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
|
||||||
|
use crate::{BEU32, FieldId, DocumentId};
|
||||||
|
|
||||||
|
static JSON: Lazy<Value> = Lazy::new(generate_json);
|
||||||
|
|
||||||
|
fn generate_json() -> Value {
|
||||||
|
let mut rng = rand::thread_rng();
|
||||||
|
let num_docs = rng.gen_range(10..30);
|
||||||
|
|
||||||
|
let mut documents = Vec::new();
|
||||||
|
|
||||||
|
let txts = ["toto", "titi", "tata"];
|
||||||
|
let cats = (1..10).map(|i| i.to_string()).collect::<Vec<_>>();
|
||||||
|
let cat_ints = (1..10).collect::<Vec<_>>();
|
||||||
|
|
||||||
|
for i in 0..num_docs {
|
||||||
|
let txt = txts.choose(&mut rng).unwrap();
|
||||||
|
let mut sample_txts = cats.clone();
|
||||||
|
sample_txts.shuffle(&mut rng);
|
||||||
|
|
||||||
|
let mut sample_ints = cat_ints.clone();
|
||||||
|
sample_ints.shuffle(&mut rng);
|
||||||
|
|
||||||
|
let doc = json!({
|
||||||
|
"id": i,
|
||||||
|
"txt": txt,
|
||||||
|
"cat-int": rng.gen_range(0..3),
|
||||||
|
"txts": sample_txts[..(rng.gen_range(0..3))],
|
||||||
|
"cat-ints": sample_ints[..(rng.gen_range(0..3))],
|
||||||
|
});
|
||||||
|
documents.push(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value::Array(documents)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a temporary index populated with random test documents, the FieldId for the
|
||||||
|
/// distinct attribute, and the RoaringBitmap with the document ids.
|
||||||
|
pub(crate) fn generate_index(distinct: &str, facets: HashMap<String, String>) -> (TempIndex, FieldId, RoaringBitmap) {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
let mut txn = index.write_txn().unwrap();
|
||||||
|
|
||||||
|
// set distinct and faceted attributes for the index.
|
||||||
|
let builder = UpdateBuilder::new(0);
|
||||||
|
let mut update = builder.settings(&mut txn, &index);
|
||||||
|
update.set_distinct_attribute(distinct.to_string());
|
||||||
|
if !facets.is_empty() {
|
||||||
|
update.set_faceted_fields(facets)
|
||||||
|
}
|
||||||
|
update.execute(|_, _| ()).unwrap();
|
||||||
|
|
||||||
|
// add documents to the index
|
||||||
|
let builder = UpdateBuilder::new(1);
|
||||||
|
let mut addition = builder.index_documents(&mut txn, &index);
|
||||||
|
|
||||||
|
addition.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
||||||
|
addition.update_format(UpdateFormat::Json);
|
||||||
|
|
||||||
|
addition
|
||||||
|
.execute(JSON.to_string().as_bytes(), |_, _| ())
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let fields_map = index.fields_ids_map(&txn).unwrap();
|
||||||
|
let fid = fields_map.id(&distinct).unwrap();
|
||||||
|
|
||||||
|
let map = (0..JSON.as_array().unwrap().len() as u32).collect();
|
||||||
|
|
||||||
|
txn.commit().unwrap();
|
||||||
|
|
||||||
|
(index, fid, map)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Checks that all the candidates are distinct, and returns the candidates number.
|
||||||
|
pub(crate) fn validate_distinct_candidates(
|
||||||
|
candidates: impl Iterator<Item=anyhow::Result<DocumentId>>,
|
||||||
|
distinct: FieldId,
|
||||||
|
index: &Index,
|
||||||
|
) -> usize {
|
||||||
|
fn test(seen: &mut HashSet<String>, value: &Value) {
|
||||||
|
match value {
|
||||||
|
Value::Null | Value::Object(_) | Value::Bool(_) => (),
|
||||||
|
Value::Number(_) | Value::String(_) => {
|
||||||
|
let s = value.to_string();
|
||||||
|
assert!(seen.insert(s));
|
||||||
|
}
|
||||||
|
Value::Array(values) => {values.into_iter().for_each(|value| test(seen, value))}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut seen = HashSet::<String>::new();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
let mut count = 0;
|
||||||
|
for candidate in candidates {
|
||||||
|
count += 1;
|
||||||
|
let candidate = candidate.unwrap();
|
||||||
|
let id = BEU32::new(candidate);
|
||||||
|
let document = index.documents.get(&txn, &id).unwrap().unwrap();
|
||||||
|
let value = document.get(distinct).unwrap();
|
||||||
|
let value = serde_json::from_slice(value).unwrap();
|
||||||
|
test(&mut seen, &value);
|
||||||
|
}
|
||||||
|
count
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -3,8 +3,8 @@ use roaring::{RoaringBitmap, bitmap::IntoIter};
|
|||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
||||||
use super::{DocIter, Distinct};
|
use super::{DocIter, Distinct};
|
||||||
|
|
||||||
/// A distinct implementer that does not perform any distinct, and simply returns an iterator to
|
/// A distinct implementer that does not perform any distinct,
|
||||||
/// the candidates.
|
/// and simply returns an iterator to the candidates.
|
||||||
pub struct NoopDistinct;
|
pub struct NoopDistinct;
|
||||||
|
|
||||||
pub struct NoopDistinctIter {
|
pub struct NoopDistinctIter {
|
||||||
@ -36,3 +36,22 @@ impl Distinct<'_> for NoopDistinct {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_noop() {
|
||||||
|
let candidates = (1..10).collect();
|
||||||
|
let excluded = RoaringBitmap::new();
|
||||||
|
let mut iter = NoopDistinct.distinct(candidates, excluded);
|
||||||
|
assert_eq!(
|
||||||
|
iter.by_ref().map(Result::unwrap).collect::<Vec<_>>(),
|
||||||
|
(1..10).collect::<Vec<_>>()
|
||||||
|
);
|
||||||
|
|
||||||
|
let excluded = iter.into_excluded();
|
||||||
|
assert!(excluded.is_empty());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user