mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-05-25 09:03:59 +02:00
1400 lines
43 KiB
Rust
1400 lines
43 KiB
Rust
use std::collections::HashSet;
|
|
use std::ops::Deref;
|
|
|
|
use big_s::S;
|
|
use bumpalo::Bump;
|
|
use heed::{EnvOpenOptions, RwTxn};
|
|
use maplit::btreemap;
|
|
use memmap2::Mmap;
|
|
use tempfile::TempDir;
|
|
|
|
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
|
use crate::error::{Error, InternalError};
|
|
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
|
use crate::progress::Progress;
|
|
use crate::update::new::indexer;
|
|
use crate::update::settings::InnerIndexSettings;
|
|
use crate::update::{
|
|
self, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting, Settings,
|
|
};
|
|
use crate::vector::settings::{EmbedderSource, EmbeddingSettings};
|
|
use crate::vector::EmbeddingConfigs;
|
|
use crate::{
|
|
db_snap, obkv_to_json, Filter, FilterableAttributesRule, Index, Search, SearchResult,
|
|
ThreadPoolNoAbortBuilder,
|
|
};
|
|
|
|
pub(crate) struct TempIndex {
|
|
pub inner: Index,
|
|
pub indexer_config: IndexerConfig,
|
|
pub index_documents_config: IndexDocumentsConfig,
|
|
_tempdir: TempDir,
|
|
}
|
|
|
|
impl Deref for TempIndex {
|
|
type Target = Index;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
&self.inner
|
|
}
|
|
}
|
|
|
|
impl TempIndex {
|
|
/// Creates a temporary index
|
|
pub fn new_with_map_size(size: usize) -> Self {
|
|
let options = EnvOpenOptions::new();
|
|
let mut options = options.read_txn_without_tls();
|
|
options.map_size(size);
|
|
let _tempdir = TempDir::new_in(".").unwrap();
|
|
let inner = Index::new(options, _tempdir.path(), true).unwrap();
|
|
let indexer_config = IndexerConfig::default();
|
|
let index_documents_config = IndexDocumentsConfig::default();
|
|
Self { inner, indexer_config, index_documents_config, _tempdir }
|
|
}
|
|
/// Creates a temporary index, with a default `4096 * 2000` size. This should be enough for
|
|
/// most tests.
|
|
pub fn new() -> Self {
|
|
Self::new_with_map_size(4096 * 2000)
|
|
}
|
|
|
|
pub fn add_documents_using_wtxn<'t>(
|
|
&'t self,
|
|
wtxn: &mut RwTxn<'t>,
|
|
documents: Mmap,
|
|
) -> Result<(), crate::error::Error> {
|
|
let local_pool;
|
|
let indexer_config = &self.indexer_config;
|
|
let pool = match &indexer_config.thread_pool {
|
|
Some(pool) => pool,
|
|
None => {
|
|
local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
|
|
&local_pool
|
|
}
|
|
};
|
|
|
|
let rtxn = self.inner.read_txn()?;
|
|
let db_fields_ids_map = self.inner.fields_ids_map(&rtxn)?;
|
|
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
|
|
|
let embedders = InnerIndexSettings::from_index(&self.inner, &rtxn, None)?.embedding_configs;
|
|
let mut indexer = indexer::DocumentOperation::new();
|
|
match self.index_documents_config.update_method {
|
|
IndexDocumentsMethod::ReplaceDocuments => {
|
|
indexer.replace_documents(&documents).unwrap()
|
|
}
|
|
IndexDocumentsMethod::UpdateDocuments => indexer.update_documents(&documents).unwrap(),
|
|
}
|
|
|
|
let indexer_alloc = Bump::new();
|
|
let (document_changes, operation_stats, primary_key) = indexer.into_changes(
|
|
&indexer_alloc,
|
|
&self.inner,
|
|
&rtxn,
|
|
None,
|
|
&mut new_fields_ids_map,
|
|
&|| false,
|
|
Progress::default(),
|
|
)?;
|
|
|
|
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
|
return Err(error.into());
|
|
}
|
|
|
|
pool.install(|| {
|
|
indexer::index(
|
|
wtxn,
|
|
&self.inner,
|
|
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
|
indexer_config.grenad_parameters(),
|
|
&db_fields_ids_map,
|
|
new_fields_ids_map,
|
|
primary_key,
|
|
&document_changes,
|
|
embedders,
|
|
&|| false,
|
|
&Progress::default(),
|
|
)
|
|
})
|
|
.unwrap()?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub fn add_documents(&self, documents: Mmap) -> Result<(), crate::error::Error> {
|
|
let mut wtxn = self.write_txn().unwrap();
|
|
self.add_documents_using_wtxn(&mut wtxn, documents)?;
|
|
wtxn.commit().unwrap();
|
|
Ok(())
|
|
}
|
|
|
|
pub fn update_settings(
|
|
&self,
|
|
update: impl Fn(&mut Settings<'_, '_, '_>),
|
|
) -> Result<(), crate::error::Error> {
|
|
let mut wtxn = self.write_txn().unwrap();
|
|
self.update_settings_using_wtxn(&mut wtxn, update)?;
|
|
wtxn.commit().unwrap();
|
|
Ok(())
|
|
}
|
|
|
|
pub fn update_settings_using_wtxn<'t>(
|
|
&'t self,
|
|
wtxn: &mut RwTxn<'t>,
|
|
update: impl Fn(&mut Settings<'_, '_, '_>),
|
|
) -> Result<(), crate::error::Error> {
|
|
let mut builder = update::Settings::new(wtxn, &self.inner, &self.indexer_config);
|
|
update(&mut builder);
|
|
builder.execute(drop, || false)?;
|
|
Ok(())
|
|
}
|
|
|
|
pub fn delete_documents_using_wtxn<'t>(
|
|
&'t self,
|
|
wtxn: &mut RwTxn<'t>,
|
|
external_document_ids: Vec<String>,
|
|
) -> Result<(), crate::error::Error> {
|
|
let local_pool;
|
|
let indexer_config = &self.indexer_config;
|
|
let pool = match &indexer_config.thread_pool {
|
|
Some(pool) => pool,
|
|
None => {
|
|
local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
|
|
&local_pool
|
|
}
|
|
};
|
|
|
|
let rtxn = self.inner.read_txn()?;
|
|
let db_fields_ids_map = self.inner.fields_ids_map(&rtxn)?;
|
|
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
|
|
|
let embedders = InnerIndexSettings::from_index(&self.inner, &rtxn, None)?.embedding_configs;
|
|
|
|
let mut indexer = indexer::DocumentOperation::new();
|
|
let external_document_ids: Vec<_> =
|
|
external_document_ids.iter().map(AsRef::as_ref).collect();
|
|
indexer.delete_documents(external_document_ids.as_slice());
|
|
|
|
let indexer_alloc = Bump::new();
|
|
let (document_changes, operation_stats, primary_key) = indexer.into_changes(
|
|
&indexer_alloc,
|
|
&self.inner,
|
|
&rtxn,
|
|
None,
|
|
&mut new_fields_ids_map,
|
|
&|| false,
|
|
Progress::default(),
|
|
)?;
|
|
|
|
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
|
return Err(error.into());
|
|
}
|
|
|
|
pool.install(|| {
|
|
indexer::index(
|
|
wtxn,
|
|
&self.inner,
|
|
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
|
indexer_config.grenad_parameters(),
|
|
&db_fields_ids_map,
|
|
new_fields_ids_map,
|
|
primary_key,
|
|
&document_changes,
|
|
embedders,
|
|
&|| false,
|
|
&Progress::default(),
|
|
)
|
|
})
|
|
.unwrap()?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub fn delete_documents(&self, external_document_ids: Vec<String>) {
|
|
let mut wtxn = self.write_txn().unwrap();
|
|
|
|
self.delete_documents_using_wtxn(&mut wtxn, external_document_ids).unwrap();
|
|
|
|
wtxn.commit().unwrap();
|
|
}
|
|
|
|
pub fn delete_document(&self, external_document_id: &str) {
|
|
self.delete_documents(vec![external_document_id.to_string()])
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn aborting_indexation() {
|
|
use std::sync::atomic::AtomicBool;
|
|
use std::sync::atomic::Ordering::Relaxed;
|
|
|
|
let index = TempIndex::new();
|
|
let mut wtxn = index.inner.write_txn().unwrap();
|
|
let should_abort = AtomicBool::new(false);
|
|
|
|
let local_pool;
|
|
let indexer_config = &index.indexer_config;
|
|
let pool = match &indexer_config.thread_pool {
|
|
Some(pool) => pool,
|
|
None => {
|
|
local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
|
|
&local_pool
|
|
}
|
|
};
|
|
|
|
let rtxn = index.inner.read_txn().unwrap();
|
|
let db_fields_ids_map = index.inner.fields_ids_map(&rtxn).unwrap();
|
|
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
|
|
|
let embedders = EmbeddingConfigs::default();
|
|
let mut indexer = indexer::DocumentOperation::new();
|
|
let payload = documents!([
|
|
{ "id": 1, "name": "kevin" },
|
|
{ "id": 2, "name": "bob", "age": 20 },
|
|
{ "id": 2, "name": "bob", "age": 20 },
|
|
]);
|
|
indexer.replace_documents(&payload).unwrap();
|
|
|
|
let indexer_alloc = Bump::new();
|
|
let (document_changes, _operation_stats, primary_key) = indexer
|
|
.into_changes(
|
|
&indexer_alloc,
|
|
&index.inner,
|
|
&rtxn,
|
|
None,
|
|
&mut new_fields_ids_map,
|
|
&|| false,
|
|
Progress::default(),
|
|
)
|
|
.unwrap();
|
|
|
|
should_abort.store(true, Relaxed);
|
|
|
|
let err = pool
|
|
.install(|| {
|
|
indexer::index(
|
|
&mut wtxn,
|
|
&index.inner,
|
|
&crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
|
indexer_config.grenad_parameters(),
|
|
&db_fields_ids_map,
|
|
new_fields_ids_map,
|
|
primary_key,
|
|
&document_changes,
|
|
embedders,
|
|
&|| should_abort.load(Relaxed),
|
|
&Progress::default(),
|
|
)
|
|
})
|
|
.unwrap()
|
|
.unwrap_err();
|
|
|
|
assert!(matches!(err, Error::InternalError(InternalError::AbortedIndexation)));
|
|
}
|
|
|
|
#[test]
|
|
fn initial_field_distribution() {
|
|
let index = TempIndex::new();
|
|
index
|
|
.add_documents(documents!([
|
|
{ "id": 1, "name": "kevin" },
|
|
{ "id": 2, "name": "bob", "age": 20 },
|
|
{ "id": 2, "name": "bob", "age": 20 },
|
|
]))
|
|
.unwrap();
|
|
|
|
db_snap!(index, field_distribution, @r###"
|
|
age 1 |
|
|
id 2 |
|
|
name 2 |
|
|
"###);
|
|
|
|
db_snap!(index, word_docids,
|
|
@r###"
|
|
1 [0, ]
|
|
2 [1, ]
|
|
20 [1, ]
|
|
bob [1, ]
|
|
kevin [0, ]
|
|
"###
|
|
);
|
|
|
|
// we add all the documents a second time. we are supposed to get the same
|
|
// field_distribution in the end
|
|
index
|
|
.add_documents(documents!([
|
|
{ "id": 1, "name": "kevin" },
|
|
{ "id": 2, "name": "bob", "age": 20 },
|
|
{ "id": 2, "name": "bob", "age": 20 },
|
|
]))
|
|
.unwrap();
|
|
|
|
db_snap!(index, field_distribution,
|
|
@r###"
|
|
age 1 |
|
|
id 2 |
|
|
name 2 |
|
|
"###
|
|
);
|
|
|
|
// then we update a document by removing one field and another by adding one field
|
|
index
|
|
.add_documents(documents!([
|
|
{ "id": 1, "name": "kevin", "has_dog": true },
|
|
{ "id": 2, "name": "bob" }
|
|
]))
|
|
.unwrap();
|
|
|
|
db_snap!(index, field_distribution,
|
|
@r###"
|
|
has_dog 1 |
|
|
id 2 |
|
|
name 2 |
|
|
"###
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn put_and_retrieve_disable_typo() {
|
|
let index = TempIndex::new();
|
|
let mut txn = index.write_txn().unwrap();
|
|
// default value is true
|
|
assert!(index.authorize_typos(&txn).unwrap());
|
|
// set to false
|
|
index.put_authorize_typos(&mut txn, false).unwrap();
|
|
txn.commit().unwrap();
|
|
|
|
let txn = index.read_txn().unwrap();
|
|
assert!(!index.authorize_typos(&txn).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
fn set_min_word_len_for_typos() {
|
|
let index = TempIndex::new();
|
|
let mut txn = index.write_txn().unwrap();
|
|
|
|
assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_ONE_TYPO);
|
|
assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_TWO_TYPOS);
|
|
|
|
index.put_min_word_len_one_typo(&mut txn, 3).unwrap();
|
|
index.put_min_word_len_two_typos(&mut txn, 15).unwrap();
|
|
|
|
txn.commit().unwrap();
|
|
|
|
let txn = index.read_txn().unwrap();
|
|
assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), 3);
|
|
assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), 15);
|
|
}
|
|
|
|
#[test]
|
|
fn add_documents_and_set_searchable_fields() {
|
|
let index = TempIndex::new();
|
|
index
|
|
.add_documents(documents!([
|
|
{ "id": 1, "doggo": "kevin" },
|
|
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
|
|
{ "id": 3, "name": "jean", "age": 25 },
|
|
]))
|
|
.unwrap();
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_searchable_fields(vec![S("doggo"), S("name")]);
|
|
})
|
|
.unwrap();
|
|
|
|
// ensure we get the right real searchable fields + user defined searchable fields
|
|
let rtxn = index.read_txn().unwrap();
|
|
|
|
let real = index.searchable_fields(&rtxn).unwrap();
|
|
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
|
|
|
|
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
|
assert_eq!(user_defined, &["doggo", "name"]);
|
|
}
|
|
|
|
#[test]
|
|
fn set_searchable_fields_and_add_documents() {
|
|
let index = TempIndex::new();
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_searchable_fields(vec![S("doggo"), S("name")]);
|
|
})
|
|
.unwrap();
|
|
|
|
// ensure we get the right real searchable fields + user defined searchable fields
|
|
let rtxn = index.read_txn().unwrap();
|
|
|
|
let real = index.searchable_fields(&rtxn).unwrap();
|
|
assert!(real.is_empty());
|
|
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
|
assert_eq!(user_defined, &["doggo", "name"]);
|
|
|
|
index
|
|
.add_documents(documents!([
|
|
{ "id": 1, "doggo": "kevin" },
|
|
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
|
|
{ "id": 3, "name": "jean", "age": 25 },
|
|
]))
|
|
.unwrap();
|
|
|
|
// ensure we get the right real searchable fields + user defined searchable fields
|
|
let rtxn = index.read_txn().unwrap();
|
|
|
|
let real = index.searchable_fields(&rtxn).unwrap();
|
|
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
|
|
|
|
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
|
assert_eq!(user_defined, &["doggo", "name"]);
|
|
}
|
|
|
|
#[test]
|
|
fn test_basic_geo_bounding_box() {
|
|
let index = TempIndex::new();
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
|
RESERVED_GEO_FIELD_NAME.to_string(),
|
|
)]);
|
|
})
|
|
.unwrap();
|
|
index
|
|
.add_documents(documents!([
|
|
{ "id": 0, RESERVED_GEO_FIELD_NAME: { "lat": "0", "lng": "0" } },
|
|
{ "id": 1, RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": "-175" } },
|
|
{ "id": 2, RESERVED_GEO_FIELD_NAME: { "lat": "0", "lng": 175 } },
|
|
{ "id": 3, RESERVED_GEO_FIELD_NAME: { "lat": 85, "lng": 0 } },
|
|
{ "id": 4, RESERVED_GEO_FIELD_NAME: { "lat": "-85", "lng": "0" } },
|
|
]))
|
|
.unwrap();
|
|
|
|
// ensure we get the right real searchable fields + user defined searchable fields
|
|
let rtxn = index.read_txn().unwrap();
|
|
let mut search = index.search(&rtxn);
|
|
|
|
// exact match a document
|
|
let search_result = search
|
|
.filter(Filter::from_str("_geoBoundingBox([0, 0], [0, 0])").unwrap().unwrap())
|
|
.execute()
|
|
.unwrap();
|
|
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0]>");
|
|
|
|
// match a document in the middle of the rectangle
|
|
let search_result = search
|
|
.filter(Filter::from_str("_geoBoundingBox([10, 10], [-10, -10])").unwrap().unwrap())
|
|
.execute()
|
|
.unwrap();
|
|
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0]>");
|
|
|
|
// select everything
|
|
let search_result = search
|
|
.filter(Filter::from_str("_geoBoundingBox([90, 180], [-90, -180])").unwrap().unwrap())
|
|
.execute()
|
|
.unwrap();
|
|
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1, 2, 3, 4]>");
|
|
|
|
// go on the edge of the longitude
|
|
let search_result = search
|
|
.filter(Filter::from_str("_geoBoundingBox([0, -170], [0, 180])").unwrap().unwrap())
|
|
.execute()
|
|
.unwrap();
|
|
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[1]>");
|
|
|
|
// go on the other edge of the longitude
|
|
let search_result = search
|
|
.filter(Filter::from_str("_geoBoundingBox([0, -180], [0, 170])").unwrap().unwrap())
|
|
.execute()
|
|
.unwrap();
|
|
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[2]>");
|
|
|
|
// wrap around the longitude
|
|
let search_result = search
|
|
.filter(Filter::from_str("_geoBoundingBox([0, -170], [0, 170])").unwrap().unwrap())
|
|
.execute()
|
|
.unwrap();
|
|
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[1, 2]>");
|
|
|
|
// go on the edge of the latitude
|
|
let search_result = search
|
|
.filter(Filter::from_str("_geoBoundingBox([90, 0], [80, 0])").unwrap().unwrap())
|
|
.execute()
|
|
.unwrap();
|
|
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[3]>");
|
|
|
|
// go on the edge of the latitude
|
|
let search_result = search
|
|
.filter(Filter::from_str("_geoBoundingBox([-80, 0], [-90, 0])").unwrap().unwrap())
|
|
.execute()
|
|
.unwrap();
|
|
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[4]>");
|
|
|
|
// the requests that don't make sense
|
|
|
|
// try to wrap around the latitude
|
|
let error = search
|
|
.filter(Filter::from_str("_geoBoundingBox([-80, 0], [80, 0])").unwrap().unwrap())
|
|
.execute()
|
|
.unwrap_err();
|
|
insta::assert_snapshot!(
|
|
error,
|
|
@r###"
|
|
The top latitude `-80` is below the bottom latitude `80`.
|
|
32:33 _geoBoundingBox([-80, 0], [80, 0])
|
|
"###
|
|
);
|
|
|
|
// send a top latitude lower than the bottow latitude
|
|
let error = search
|
|
.filter(Filter::from_str("_geoBoundingBox([-10, 0], [10, 0])").unwrap().unwrap())
|
|
.execute()
|
|
.unwrap_err();
|
|
insta::assert_snapshot!(
|
|
error,
|
|
@r###"
|
|
The top latitude `-10` is below the bottom latitude `10`.
|
|
32:33 _geoBoundingBox([-10, 0], [10, 0])
|
|
"###
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_contains() {
|
|
let index = TempIndex::new();
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings
|
|
.set_filterable_fields(vec![FilterableAttributesRule::Field("doggo".to_string())]);
|
|
})
|
|
.unwrap();
|
|
index
|
|
.add_documents(documents!([
|
|
{ "id": 0, "doggo": "kefir" },
|
|
{ "id": 1, "doggo": "kefirounet" },
|
|
{ "id": 2, "doggo": "kefkef" },
|
|
{ "id": 3, "doggo": "fifir" },
|
|
{ "id": 4, "doggo": "boubou" },
|
|
{ "id": 5 },
|
|
]))
|
|
.unwrap();
|
|
|
|
let rtxn = index.read_txn().unwrap();
|
|
let mut search = index.search(&rtxn);
|
|
let search_result = search
|
|
.filter(Filter::from_str("doggo CONTAINS kefir").unwrap().unwrap())
|
|
.execute()
|
|
.unwrap();
|
|
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1]>");
|
|
let mut search = index.search(&rtxn);
|
|
let search_result =
|
|
search.filter(Filter::from_str("doggo CONTAINS KEF").unwrap().unwrap()).execute().unwrap();
|
|
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1, 2]>");
|
|
let mut search = index.search(&rtxn);
|
|
let search_result = search
|
|
.filter(Filter::from_str("doggo NOT CONTAINS fir").unwrap().unwrap())
|
|
.execute()
|
|
.unwrap();
|
|
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[2, 4, 5]>");
|
|
}
|
|
|
|
#[test]
|
|
fn replace_documents_external_ids_and_soft_deletion_check() {
|
|
let index = TempIndex::new();
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_primary_key("id".to_owned());
|
|
settings
|
|
.set_filterable_fields(vec![FilterableAttributesRule::Field("doggo".to_string())]);
|
|
})
|
|
.unwrap();
|
|
|
|
let mut docs = vec![];
|
|
for i in 0..4 {
|
|
docs.push(serde_json::json!(
|
|
{ "id": i, "doggo": i }
|
|
));
|
|
}
|
|
index.add_documents(documents!(docs)).unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, 2, 3, ]");
|
|
db_snap!(index, external_documents_ids, 1, @r###"
|
|
docids:
|
|
0 0
|
|
1 1
|
|
2 2
|
|
3 3
|
|
"###);
|
|
db_snap!(index, facet_id_f64_docids, 1, @r###"
|
|
1 0 0 1 [0, ]
|
|
1 0 1 1 [1, ]
|
|
1 0 2 1 [2, ]
|
|
1 0 3 1 [3, ]
|
|
"###);
|
|
|
|
let mut docs = vec![];
|
|
for i in 0..3 {
|
|
docs.push(serde_json::json!(
|
|
{ "id": i, "doggo": i + 1 }
|
|
));
|
|
}
|
|
index.add_documents(documents!(docs)).unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, 2, 3, ]");
|
|
db_snap!(index, external_documents_ids, 2, @r###"
|
|
docids:
|
|
0 0
|
|
1 1
|
|
2 2
|
|
3 3
|
|
"###);
|
|
db_snap!(index, facet_id_f64_docids, 2, @r###"
|
|
1 0 1 1 [0, ]
|
|
1 0 2 1 [1, ]
|
|
1 0 3 1 [2, 3, ]
|
|
"###);
|
|
|
|
index
|
|
.add_documents(
|
|
documents!([{ "id": 3, "doggo": 4 }, { "id": 3, "doggo": 5 },{ "id": 3, "doggo": 4 }]),
|
|
)
|
|
.unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, 2, 3, ]");
|
|
db_snap!(index, external_documents_ids, 3, @r###"
|
|
docids:
|
|
0 0
|
|
1 1
|
|
2 2
|
|
3 3
|
|
"###);
|
|
db_snap!(index, facet_id_f64_docids, 3, @r###"
|
|
1 0 1 1 [0, ]
|
|
1 0 2 1 [1, ]
|
|
1 0 3 1 [2, ]
|
|
1 0 4 1 [3, ]
|
|
"###);
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_distinct_field("id".to_owned());
|
|
})
|
|
.unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, 2, 3, ]");
|
|
db_snap!(index, external_documents_ids, 3, @r###"
|
|
docids:
|
|
0 0
|
|
1 1
|
|
2 2
|
|
3 3
|
|
"###);
|
|
db_snap!(index, facet_id_f64_docids, 3, @r###"
|
|
0 0 0 1 [0, ]
|
|
0 0 1 1 [1, ]
|
|
0 0 2 1 [2, ]
|
|
0 0 3 1 [3, ]
|
|
1 0 1 1 [0, ]
|
|
1 0 2 1 [1, ]
|
|
1 0 3 1 [2, ]
|
|
1 0 4 1 [3, ]
|
|
"###);
|
|
}
|
|
|
|
#[test]
|
|
fn bug_3021_first() {
|
|
// https://github.com/meilisearch/meilisearch/issues/3021
|
|
let mut index = TempIndex::new();
|
|
index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_primary_key("primary_key".to_owned());
|
|
})
|
|
.unwrap();
|
|
|
|
index
|
|
.add_documents(documents!([
|
|
{ "primary_key": 38 },
|
|
{ "primary_key": 34 }
|
|
]))
|
|
.unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, ]");
|
|
db_snap!(index, external_documents_ids, 1, @r###"
|
|
docids:
|
|
34 1
|
|
38 0
|
|
"###);
|
|
|
|
index.delete_document("34");
|
|
|
|
db_snap!(index, documents_ids, @"[0, ]");
|
|
db_snap!(index, external_documents_ids, 2, @r###"
|
|
docids:
|
|
38 0
|
|
"###);
|
|
|
|
index
|
|
.update_settings(|s| {
|
|
s.set_searchable_fields(vec![]);
|
|
})
|
|
.unwrap();
|
|
|
|
// The key point of the test is to verify that the external documents ids
|
|
// do not contain any entry for previously soft-deleted document ids
|
|
db_snap!(index, documents_ids, @"[0, ]");
|
|
db_snap!(index, external_documents_ids, 3, @r###"
|
|
docids:
|
|
38 0
|
|
"###);
|
|
|
|
// So that this document addition works correctly now.
|
|
// It would be wrongly interpreted as a replacement before
|
|
index.add_documents(documents!({ "primary_key": 34 })).unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, ]");
|
|
db_snap!(index, external_documents_ids, 4, @r###"
|
|
docids:
|
|
34 1
|
|
38 0
|
|
"###);
|
|
|
|
// We do the test again, but deleting the document with id 0 instead of id 1 now
|
|
index.delete_document("38");
|
|
|
|
db_snap!(index, documents_ids, @"[1, ]");
|
|
db_snap!(index, external_documents_ids, 5, @r###"
|
|
docids:
|
|
34 1
|
|
"###);
|
|
|
|
index
|
|
.update_settings(|s| {
|
|
s.set_searchable_fields(vec!["primary_key".to_owned()]);
|
|
})
|
|
.unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[1, ]");
|
|
db_snap!(index, external_documents_ids, 6, @r###"
|
|
docids:
|
|
34 1
|
|
"###);
|
|
|
|
// And adding lots of documents afterwards instead of just one.
|
|
// These extra subtests don't add much, but it's better than nothing.
|
|
index
|
|
.add_documents(documents!([
|
|
{ "primary_key": 38 },
|
|
{ "primary_key": 39 },
|
|
{ "primary_key": 41 },
|
|
{ "primary_key": 40 },
|
|
{ "primary_key": 41 },
|
|
{ "primary_key": 42 },
|
|
]))
|
|
.unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, 2, 3, 4, 5, ]");
|
|
db_snap!(index, external_documents_ids, 7, @r###"
|
|
docids:
|
|
34 1
|
|
38 0
|
|
39 2
|
|
40 4
|
|
41 3
|
|
42 5
|
|
"###);
|
|
}
|
|
|
|
#[test]
|
|
fn simple_delete() {
|
|
let mut index = TempIndex::new();
|
|
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
|
|
index
|
|
.add_documents(documents!([
|
|
{ "id": 30 },
|
|
{ "id": 34 }
|
|
]))
|
|
.unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, ]");
|
|
db_snap!(index, external_documents_ids, 1, @r###"
|
|
docids:
|
|
30 0
|
|
34 1"###);
|
|
|
|
index.delete_document("34");
|
|
|
|
db_snap!(index, documents_ids, @"[0, ]");
|
|
db_snap!(index, external_documents_ids, 2, @r###"
|
|
docids:
|
|
30 0
|
|
"###);
|
|
}
|
|
|
|
#[test]
|
|
fn bug_3021_second() {
|
|
// https://github.com/meilisearch/meilisearch/issues/3021
|
|
let mut index = TempIndex::new();
|
|
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_primary_key("primary_key".to_owned());
|
|
})
|
|
.unwrap();
|
|
|
|
index
|
|
.add_documents(documents!([
|
|
{ "primary_key": 30 },
|
|
{ "primary_key": 34 }
|
|
]))
|
|
.unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, ]");
|
|
db_snap!(index, external_documents_ids, 1, @r###"
|
|
docids:
|
|
30 0
|
|
34 1
|
|
"###);
|
|
|
|
index.delete_document("34");
|
|
|
|
db_snap!(index, documents_ids, @"[0, ]");
|
|
db_snap!(index, external_documents_ids, 2, @r###"
|
|
docids:
|
|
30 0
|
|
"###);
|
|
|
|
index
|
|
.update_settings(|s| {
|
|
s.set_searchable_fields(vec![]);
|
|
})
|
|
.unwrap();
|
|
|
|
// The key point of the test is to verify that the external documents ids
|
|
// do not contain any entry for previously soft-deleted document ids
|
|
db_snap!(index, documents_ids, @"[0, ]");
|
|
db_snap!(index, external_documents_ids, 3, @r###"
|
|
docids:
|
|
30 0
|
|
"###);
|
|
|
|
// So that when we add a new document
|
|
index.add_documents(documents!({ "primary_key": 35, "b": 2 })).unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, ]");
|
|
// The external documents ids don't have several external ids pointing to the same
|
|
// internal document id
|
|
db_snap!(index, external_documents_ids, 4, @r###"
|
|
docids:
|
|
30 0
|
|
35 1
|
|
"###);
|
|
|
|
// And when we add 34 again, we don't replace document 35
|
|
index.add_documents(documents!({ "primary_key": 34, "a": 1 })).unwrap();
|
|
|
|
// And document 35 still exists, is not deleted
|
|
db_snap!(index, documents_ids, @"[0, 1, 2, ]");
|
|
db_snap!(index, external_documents_ids, 5, @r###"
|
|
docids:
|
|
30 0
|
|
34 2
|
|
35 1
|
|
"###);
|
|
|
|
let rtxn = index.read_txn().unwrap();
|
|
let (_docid, obkv) = index.documents(&rtxn, [0]).unwrap()[0];
|
|
let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
|
|
insta::assert_debug_snapshot!(json, @r###"
|
|
{
|
|
"primary_key": Number(30),
|
|
}
|
|
"###);
|
|
|
|
// Furthermore, when we retrieve document 34, it is not the result of merging 35 with 34
|
|
let (_docid, obkv) = index.documents(&rtxn, [2]).unwrap()[0];
|
|
let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
|
|
insta::assert_debug_snapshot!(json, @r###"
|
|
{
|
|
"primary_key": Number(34),
|
|
"a": Number(1),
|
|
}
|
|
"###);
|
|
|
|
drop(rtxn);
|
|
|
|
// Add new documents again
|
|
index
|
|
.add_documents(
|
|
documents!([{ "primary_key": 37 }, { "primary_key": 38 }, { "primary_key": 39 }]),
|
|
)
|
|
.unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, 2, 3, 4, 5, ]");
|
|
db_snap!(index, external_documents_ids, 6, @r###"
|
|
docids:
|
|
30 0
|
|
34 2
|
|
35 1
|
|
37 3
|
|
38 4
|
|
39 5
|
|
"###);
|
|
}
|
|
|
|
#[test]
|
|
fn bug_3021_third() {
|
|
// https://github.com/meilisearch/meilisearch/issues/3021
|
|
let mut index = TempIndex::new();
|
|
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_primary_key("primary_key".to_owned());
|
|
})
|
|
.unwrap();
|
|
|
|
index
|
|
.add_documents(documents!([
|
|
{ "primary_key": 3 },
|
|
{ "primary_key": 4 },
|
|
{ "primary_key": 5 }
|
|
]))
|
|
.unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, 2, ]");
|
|
db_snap!(index, external_documents_ids, 1, @r###"
|
|
docids:
|
|
3 0
|
|
4 1
|
|
5 2
|
|
"###);
|
|
|
|
index.delete_document("3");
|
|
|
|
db_snap!(index, documents_ids, @"[1, 2, ]");
|
|
db_snap!(index, external_documents_ids, 2, @r###"
|
|
docids:
|
|
4 1
|
|
5 2
|
|
"###);
|
|
|
|
index.add_documents(documents!([{ "primary_key": "4", "a": 2 }])).unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[1, 2, ]");
|
|
db_snap!(index, external_documents_ids, 2, @r###"
|
|
docids:
|
|
4 1
|
|
5 2
|
|
"###);
|
|
|
|
index
|
|
.add_documents(documents!([
|
|
{ "primary_key": "3" },
|
|
]))
|
|
.unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, 2, ]");
|
|
db_snap!(index, external_documents_ids, 2, @r###"
|
|
docids:
|
|
3 0
|
|
4 1
|
|
5 2
|
|
"###);
|
|
}
|
|
|
|
#[test]
|
|
fn bug_3021_fourth() {
|
|
// https://github.com/meilisearch/meilisearch/issues/3021
|
|
let mut index = TempIndex::new();
|
|
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_primary_key("primary_key".to_owned());
|
|
})
|
|
.unwrap();
|
|
|
|
index
|
|
.add_documents(documents!([
|
|
{ "primary_key": 11 },
|
|
{ "primary_key": 4 },
|
|
]))
|
|
.unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, ]");
|
|
db_snap!(index, external_documents_ids, @r###"
|
|
docids:
|
|
11 0
|
|
4 1
|
|
"###);
|
|
db_snap!(index, fields_ids_map, @r###"
|
|
0 primary_key |
|
|
"###);
|
|
db_snap!(index, searchable_fields, @r###"["primary_key"]"###);
|
|
db_snap!(index, fieldids_weights_map, @r###"
|
|
fid weight
|
|
0 0 |
|
|
"###);
|
|
|
|
index
|
|
.add_documents(documents!([
|
|
{ "primary_key": 4, "a": 0 },
|
|
{ "primary_key": 1 },
|
|
]))
|
|
.unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, 2, ]");
|
|
db_snap!(index, external_documents_ids, @r###"
|
|
docids:
|
|
1 2
|
|
11 0
|
|
4 1
|
|
"###);
|
|
db_snap!(index, fields_ids_map, @r###"
|
|
0 primary_key |
|
|
1 a |
|
|
"###);
|
|
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
|
|
db_snap!(index, fieldids_weights_map, @r###"
|
|
fid weight
|
|
0 0 |
|
|
1 0 |
|
|
"###);
|
|
|
|
index.delete_documents(Default::default());
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, 2, ]");
|
|
db_snap!(index, external_documents_ids, @r###"
|
|
docids:
|
|
1 2
|
|
11 0
|
|
4 1
|
|
"###);
|
|
db_snap!(index, fields_ids_map, @r###"
|
|
0 primary_key |
|
|
1 a |
|
|
"###);
|
|
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
|
|
db_snap!(index, fieldids_weights_map, @r###"
|
|
fid weight
|
|
0 0 |
|
|
1 0 |
|
|
"###);
|
|
|
|
index
|
|
.add_documents(documents!([
|
|
{ "primary_key": 4, "a": 1 },
|
|
{ "primary_key": 1, "a": 0 },
|
|
]))
|
|
.unwrap();
|
|
|
|
db_snap!(index, documents_ids, @"[0, 1, 2, ]");
|
|
db_snap!(index, external_documents_ids, @r###"
|
|
docids:
|
|
1 2
|
|
11 0
|
|
4 1
|
|
"###);
|
|
db_snap!(index, fields_ids_map, @r###"
|
|
0 primary_key |
|
|
1 a |
|
|
"###);
|
|
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
|
|
db_snap!(index, fieldids_weights_map, @r###"
|
|
fid weight
|
|
0 0 |
|
|
1 0 |
|
|
"###);
|
|
|
|
let rtxn = index.read_txn().unwrap();
|
|
let search = Search::new(&rtxn, &index);
|
|
let SearchResult {
|
|
matching_words: _,
|
|
candidates: _,
|
|
document_scores: _,
|
|
mut documents_ids,
|
|
degraded: _,
|
|
used_negative_operator: _,
|
|
} = search.execute().unwrap();
|
|
let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
|
|
documents_ids.sort_unstable();
|
|
let docs = index.documents(&rtxn, documents_ids).unwrap();
|
|
let mut all_ids = HashSet::new();
|
|
for (_docid, obkv) in docs {
|
|
let id = obkv.get(primary_key_id).unwrap();
|
|
assert!(all_ids.insert(id));
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn bug_3007() {
|
|
// https://github.com/meilisearch/meilisearch/issues/3007
|
|
|
|
use crate::error::{GeoError, UserError};
|
|
let index = TempIndex::new();
|
|
|
|
// Given is an index with a geo field NOT contained in the sortable_fields of the settings
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_primary_key("id".to_string());
|
|
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
|
RESERVED_GEO_FIELD_NAME.to_string(),
|
|
)]);
|
|
})
|
|
.unwrap();
|
|
|
|
// happy path
|
|
index
|
|
.add_documents(documents!({ "id" : 5, RESERVED_GEO_FIELD_NAME: {"lat": 12.0, "lng": 11.0}}))
|
|
.unwrap();
|
|
|
|
db_snap!(index, geo_faceted_documents_ids);
|
|
|
|
// both are unparseable, we expect GeoError::BadLatitudeAndLongitude
|
|
let err1 = index
|
|
.add_documents(
|
|
documents!({ "id" : 6, RESERVED_GEO_FIELD_NAME: {"lat": "unparseable", "lng": "unparseable"}}),
|
|
)
|
|
.unwrap_err();
|
|
match err1 {
|
|
Error::UserError(UserError::InvalidGeoField(err)) => match *err {
|
|
GeoError::BadLatitudeAndLongitude { .. } => (),
|
|
otherwise => {
|
|
panic!("err1 is not a BadLatitudeAndLongitude error but rather a {otherwise:?}")
|
|
}
|
|
},
|
|
_ => panic!("err1 is not a BadLatitudeAndLongitude error but rather a {err1:?}"),
|
|
}
|
|
|
|
db_snap!(index, geo_faceted_documents_ids); // ensure that no more document was inserted
|
|
}
|
|
|
|
#[test]
|
|
fn unexpected_extra_fields_in_geo_field() {
|
|
let index = TempIndex::new();
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_primary_key("id".to_string());
|
|
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
|
RESERVED_GEO_FIELD_NAME.to_string(),
|
|
)]);
|
|
})
|
|
.unwrap();
|
|
|
|
let err = index
|
|
.add_documents(
|
|
documents!({ "id" : "doggo", RESERVED_GEO_FIELD_NAME: { "lat": 1, "lng": 2, "doggo": "are the best" }}),
|
|
)
|
|
.unwrap_err();
|
|
insta::assert_snapshot!(err, @r###"The `_geo` field in the document with the id: `"doggo"` contains the following unexpected fields: `{"doggo":"are the best"}`."###);
|
|
|
|
db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted
|
|
|
|
// multiple fields and complex values
|
|
let err = index
|
|
.add_documents(
|
|
documents!({ "id" : "doggo", RESERVED_GEO_FIELD_NAME: { "lat": 1, "lng": 2, "doggo": "are the best", "and": { "all": ["cats", { "are": "beautiful" } ] } } }),
|
|
)
|
|
.unwrap_err();
|
|
insta::assert_snapshot!(err, @r###"The `_geo` field in the document with the id: `"doggo"` contains the following unexpected fields: `{"and":{"all":["cats",{"are":"beautiful"}]},"doggo":"are the best"}`."###);
|
|
|
|
db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted
|
|
}
|
|
|
|
#[test]
|
|
fn swapping_searchable_attributes() {
|
|
// See https://github.com/meilisearch/meilisearch/issues/4484
|
|
|
|
let index = TempIndex::new();
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_searchable_fields(vec![S("name")]);
|
|
settings
|
|
.set_filterable_fields(vec![FilterableAttributesRule::Field("age".to_string())]);
|
|
})
|
|
.unwrap();
|
|
|
|
index
|
|
.add_documents(documents!({ "id": 1, "name": "Many", "age": 28, "realName": "Maxime" }))
|
|
.unwrap();
|
|
db_snap!(index, fields_ids_map, @r###"
|
|
0 id |
|
|
1 name |
|
|
2 age |
|
|
3 realName |
|
|
"###);
|
|
db_snap!(index, searchable_fields, @r###"["name"]"###);
|
|
db_snap!(index, fieldids_weights_map, @r###"
|
|
fid weight
|
|
1 0 |
|
|
"###);
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_searchable_fields(vec![S("name"), S("realName")]);
|
|
settings
|
|
.set_filterable_fields(vec![FilterableAttributesRule::Field("age".to_string())]);
|
|
})
|
|
.unwrap();
|
|
|
|
// The order of the field id map shouldn't change
|
|
db_snap!(index, fields_ids_map, @r###"
|
|
0 id |
|
|
1 name |
|
|
2 age |
|
|
3 realName |
|
|
"###);
|
|
db_snap!(index, searchable_fields, @r###"["name", "realName"]"###);
|
|
db_snap!(index, fieldids_weights_map, @r###"
|
|
fid weight
|
|
1 0 |
|
|
3 1 |
|
|
"###);
|
|
}
|
|
|
|
#[test]
|
|
fn attribute_weights_after_swapping_searchable_attributes() {
|
|
// See https://github.com/meilisearch/meilisearch/issues/4484
|
|
|
|
let index = TempIndex::new();
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_searchable_fields(vec![S("name"), S("beverage")]);
|
|
})
|
|
.unwrap();
|
|
|
|
index
|
|
.add_documents(documents!([
|
|
{ "id": 0, "name": "kefir", "beverage": "water" },
|
|
{ "id": 1, "name": "tamo", "beverage": "kefir" }
|
|
]))
|
|
.unwrap();
|
|
|
|
let rtxn = index.read_txn().unwrap();
|
|
let mut search = index.search(&rtxn);
|
|
let results = search.query("kefir").execute().unwrap();
|
|
|
|
// We should find kefir the dog first
|
|
insta::assert_debug_snapshot!(results.documents_ids, @r###"
|
|
[
|
|
0,
|
|
1,
|
|
]
|
|
"###);
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_searchable_fields(vec![S("beverage"), S("name")]);
|
|
})
|
|
.unwrap();
|
|
|
|
let rtxn = index.read_txn().unwrap();
|
|
let mut search = index.search(&rtxn);
|
|
let results = search.query("kefir").execute().unwrap();
|
|
|
|
// We should find tamo first
|
|
insta::assert_debug_snapshot!(results.documents_ids, @r###"
|
|
[
|
|
1,
|
|
0,
|
|
]
|
|
"###);
|
|
}
|
|
|
|
#[test]
|
|
fn vectors_are_never_indexed_as_searchable_or_filterable() {
|
|
let index = TempIndex::new();
|
|
|
|
index
|
|
.add_documents(documents!([
|
|
{ "id": 0, "_vectors": { "doggo": [2345] } },
|
|
{ "id": 1, "_vectors": { "doggo": [6789] } },
|
|
]))
|
|
.unwrap();
|
|
|
|
db_snap!(index, fields_ids_map, @r###"
|
|
0 id |
|
|
1 _vectors |
|
|
"###);
|
|
db_snap!(index, searchable_fields, @r###"["id"]"###);
|
|
db_snap!(index, fieldids_weights_map, @r###"
|
|
fid weight
|
|
0 0 |
|
|
"###);
|
|
|
|
let rtxn = index.read_txn().unwrap();
|
|
let mut search = index.search(&rtxn);
|
|
let results = search.query("2345").execute().unwrap();
|
|
assert!(results.candidates.is_empty());
|
|
drop(rtxn);
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_searchable_fields(vec![S("_vectors"), S("_vectors.doggo")]);
|
|
settings.set_filterable_fields(vec![
|
|
FilterableAttributesRule::Field("_vectors".to_string()),
|
|
FilterableAttributesRule::Field("_vectors.doggo".to_string()),
|
|
]);
|
|
})
|
|
.unwrap();
|
|
|
|
db_snap!(index, fields_ids_map, @r###"
|
|
0 id |
|
|
1 _vectors |
|
|
"###);
|
|
db_snap!(index, searchable_fields, @"[]");
|
|
db_snap!(index, fieldids_weights_map, @r###"
|
|
fid weight
|
|
"###);
|
|
|
|
let rtxn = index.read_txn().unwrap();
|
|
let mut search = index.search(&rtxn);
|
|
let results = search.query("2345").execute().unwrap();
|
|
assert!(results.candidates.is_empty());
|
|
|
|
let mut search = index.search(&rtxn);
|
|
let results = search
|
|
.filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap())
|
|
.execute()
|
|
.unwrap();
|
|
assert!(results.candidates.is_empty());
|
|
|
|
index
|
|
.update_settings(|settings| {
|
|
settings.set_embedder_settings(btreemap! {
|
|
S("doggo") => Setting::Set(EmbeddingSettings {
|
|
dimensions: Setting::Set(1),
|
|
source: Setting::Set(EmbedderSource::UserProvided),
|
|
..EmbeddingSettings::default()}),
|
|
});
|
|
})
|
|
.unwrap();
|
|
|
|
db_snap!(index, fields_ids_map, @r###"
|
|
0 id |
|
|
1 _vectors |
|
|
"###);
|
|
db_snap!(index, searchable_fields, @"[]");
|
|
db_snap!(index, fieldids_weights_map, @r###"
|
|
fid weight
|
|
"###);
|
|
|
|
let rtxn = index.read_txn().unwrap();
|
|
let mut search = index.search(&rtxn);
|
|
let results = search.query("2345").execute().unwrap();
|
|
assert!(results.candidates.is_empty());
|
|
|
|
let mut search = index.search(&rtxn);
|
|
let results = search
|
|
.filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap())
|
|
.execute()
|
|
.unwrap();
|
|
assert!(results.candidates.is_empty());
|
|
}
|