mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-05-25 09:03:59 +02:00
112 lines
3.5 KiB
Rust
112 lines
3.5 KiB
Rust
use std::io::Write;
|
|
|
|
use big_s::S;
|
|
use bumpalo::Bump;
|
|
use heed::EnvOpenOptions;
|
|
use maplit::{btreemap, hashset};
|
|
|
|
use crate::progress::Progress;
|
|
use crate::update::new::indexer;
|
|
use crate::update::{IndexerConfig, Settings};
|
|
use crate::vector::EmbeddingConfigs;
|
|
use crate::{db_snap, Criterion, FilterableAttributesRule, Index};
|
|
pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson");
|
|
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
|
|
|
pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|
let path = tempfile::tempdir().unwrap();
|
|
let options = EnvOpenOptions::new();
|
|
let mut options = options.read_txn_without_tls();
|
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
|
let index = Index::new(options, &path, true).unwrap();
|
|
|
|
let mut wtxn = index.write_txn().unwrap();
|
|
let config = IndexerConfig::default();
|
|
|
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
|
|
|
builder.set_criteria(criteria.to_vec());
|
|
builder.set_filterable_fields(vec![
|
|
FilterableAttributesRule::Field(S("tag")),
|
|
FilterableAttributesRule::Field(S("asc_desc_rank")),
|
|
FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)),
|
|
FilterableAttributesRule::Field(S("opt1")),
|
|
FilterableAttributesRule::Field(S("opt1.opt2")),
|
|
FilterableAttributesRule::Field(S("tag_in")),
|
|
]);
|
|
builder.set_sortable_fields(hashset! {
|
|
S("tag"),
|
|
S("asc_desc_rank"),
|
|
});
|
|
builder.set_synonyms(btreemap! {
|
|
S("hello") => vec![S("good morning")],
|
|
S("world") => vec![S("earth")],
|
|
S("america") => vec![S("the united states")],
|
|
});
|
|
builder.set_searchable_fields(vec![S("title"), S("description")]);
|
|
builder.execute(|_| (), || false).unwrap();
|
|
wtxn.commit().unwrap();
|
|
|
|
// index documents
|
|
let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
|
|
let rtxn = index.read_txn().unwrap();
|
|
let mut wtxn = index.write_txn().unwrap();
|
|
|
|
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
|
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
|
|
|
let embedders = EmbeddingConfigs::default();
|
|
let mut indexer = indexer::DocumentOperation::new();
|
|
|
|
let mut file = tempfile::tempfile().unwrap();
|
|
file.write_all(CONTENT.as_bytes()).unwrap();
|
|
file.sync_all().unwrap();
|
|
let payload = unsafe { memmap2::Mmap::map(&file).unwrap() };
|
|
|
|
// index documents
|
|
indexer.replace_documents(&payload).unwrap();
|
|
|
|
let indexer_alloc = Bump::new();
|
|
let (document_changes, operation_stats, primary_key) = indexer
|
|
.into_changes(
|
|
&indexer_alloc,
|
|
&index,
|
|
&rtxn,
|
|
None,
|
|
&mut new_fields_ids_map,
|
|
&|| false,
|
|
Progress::default(),
|
|
)
|
|
.unwrap();
|
|
|
|
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
|
panic!("{error}");
|
|
}
|
|
|
|
indexer::index(
|
|
&mut wtxn,
|
|
&index,
|
|
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
|
config.grenad_parameters(),
|
|
&db_fields_ids_map,
|
|
new_fields_ids_map,
|
|
primary_key,
|
|
&document_changes,
|
|
embedders,
|
|
&|| false,
|
|
&Progress::default(),
|
|
)
|
|
.unwrap();
|
|
|
|
wtxn.commit().unwrap();
|
|
drop(rtxn);
|
|
|
|
index
|
|
}
|
|
|
|
#[test]
|
|
fn snapshot_integration_dataset() {
|
|
let index = setup_search_index_with_criteria(&[Criterion::Attribute]);
|
|
db_snap!(index, word_position_docids, @"3c9347a767bceef3beb31465f1e5f3ae");
|
|
}
|