MeiliSearch/crates/milli/tests/search/facet_distribution.rs

93 lines
2.9 KiB
Rust
Raw Permalink Normal View History

use big_s::S;
2024-11-18 17:39:55 +01:00
use bumpalo::Bump;
use heed::EnvOpenOptions;
use maplit::hashset;
use milli::documents::mmap_from_objects;
2024-12-10 16:30:48 +01:00
use milli::progress::Progress;
2024-11-18 17:39:55 +01:00
use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
2024-11-18 17:39:55 +01:00
use milli::vector::EmbeddingConfigs;
use milli::{FacetDistribution, Index, Object, OrderBy};
use serde_json::{from_value, json};
#[test]
fn test_facet_distribution_with_no_facet_values() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
let mut wtxn = index.write_txn().unwrap();
let config = IndexerConfig::default();
let mut builder = Settings::new(&mut wtxn, &index, &config);
builder.set_filterable_fields(hashset! {
S("genres"),
S("tags"),
});
builder.execute(|_| (), || false).unwrap();
2024-11-20 11:35:01 +01:00
wtxn.commit().unwrap();
// index documents
let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
2024-11-18 17:39:55 +01:00
let rtxn = index.read_txn().unwrap();
2024-11-20 11:35:01 +01:00
let mut wtxn = index.write_txn().unwrap();
2024-11-18 17:39:55 +01:00
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
2024-11-18 17:39:55 +01:00
let embedders = EmbeddingConfigs::default();
let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
2024-11-18 17:39:55 +01:00
let doc1: Object = from_value(
json!({ "id": 123, "title": "What a week, hu...", "genres": [], "tags": ["blue"] }),
)
.unwrap();
let doc2: Object =
from_value(json!({ "id": 345, "title": "I am the pig!", "tags": ["red"] })).unwrap();
let documents = mmap_from_objects(vec![doc1, doc2]);
// index documents
2024-11-18 17:39:55 +01:00
indexer.add_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
2024-11-20 14:58:25 +01:00
let (document_changes, _operation_stats, primary_key) = indexer
2024-11-20 15:10:09 +01:00
.into_changes(
&indexer_alloc,
&index,
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
2024-12-10 16:30:48 +01:00
Progress::default(),
2024-11-20 15:10:09 +01:00
)
2024-11-20 14:58:25 +01:00
.unwrap();
2024-11-18 17:39:55 +01:00
indexer::index(
&mut wtxn,
&index,
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
2024-11-18 17:39:55 +01:00
config.grenad_parameters(),
&db_fields_ids_map,
new_fields_ids_map,
primary_key,
&document_changes,
embedders,
&|| false,
2024-12-10 16:30:48 +01:00
&Progress::default(),
2024-11-18 17:39:55 +01:00
)
.unwrap();
wtxn.commit().unwrap();
2024-11-20 11:35:01 +01:00
let rtxn = index.read_txn().unwrap();
let mut distrib = FacetDistribution::new(&rtxn, &index);
distrib.facets(vec![("genres", OrderBy::default())]);
let result = distrib.execute().unwrap();
assert_eq!(result["genres"].len(), 0);
2024-11-20 11:35:01 +01:00
let mut distrib = FacetDistribution::new(&rtxn, &index);
distrib.facets(vec![("tags", OrderBy::default())]);
let result = distrib.execute().unwrap();
assert_eq!(result["tags"].len(), 2);
}