From bddc168d837929f79c981315a16eeaff8c96c03d Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 23 Nov 2023 14:43:52 +0100 Subject: [PATCH 1/7] List TODOs --- milli/src/search/new/db_cache.rs | 3 +++ milli/src/update/index_documents/extract/mod.rs | 2 ++ milli/src/update/settings.rs | 2 ++ 3 files changed, 7 insertions(+) diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index 24c7d5076..b7a74fb62 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -263,6 +263,7 @@ impl<'ctx> SearchContext<'ctx> { word2: Interned, proximity: u8, ) -> Result> { + // TODO: if database is empty, search if the word are in the same attribute instead DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( self.txn, (proximity, word1, word2), @@ -282,6 +283,7 @@ impl<'ctx> SearchContext<'ctx> { word2: Interned, proximity: u8, ) -> Result> { + // TODO: if database is empty, search if the word are in the same attribute instead DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>( self.txn, (proximity, word1, word2), @@ -301,6 +303,7 @@ impl<'ctx> SearchContext<'ctx> { prefix2: Interned, proximity: u8, ) -> Result> { + // TODO: if database is empty, search if the word are in the same attribute instead let docids = match self .db_cache .word_prefix_pair_proximity_docids diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 91f3e1c62..366e61c04 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -52,6 +52,7 @@ pub(crate) fn data_from_obkv_documents( dictionary: Option<&[&str]>, max_positions_per_attributes: Option, exact_attributes: HashSet, + // TODO: add a proximity database deactivation parameter. ) -> Result<()> { puffin::profile_function!(); @@ -150,6 +151,7 @@ pub(crate) fn data_from_obkv_documents( }); } + // TODO: Skip this part if deactivated spawn_extraction_task::<_, _, Vec>>>( docid_word_positions_chunks.clone(), indexer, diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 98697325e..0a069c6df 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -127,6 +127,7 @@ pub struct Settings<'a, 't, 'i> { max_values_per_facet: Setting, sort_facet_values_by: Setting>, pagination_max_total_hits: Setting, + // TODO: add a proximity database deactivation attribute. } impl<'a, 't, 'i> Settings<'a, 't, 'i> { @@ -906,6 +907,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { || synonyms_updated || searchable_updated || exact_attributes_updated + // TODO: reindex if proximity database is activated { self.reindex(&progress_callback, &should_abort, old_fields_ids_map)?; } From 0c3fa8cbc408d923e4c71984eb5404bdbbcc740c Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 6 Dec 2023 14:51:44 +0100 Subject: [PATCH 2/7] Add tests on proximityPrecision setting --- meilisearch/tests/settings/mod.rs | 1 + .../tests/settings/proximity_settings.rs | 396 ++++++++++++++++++ 2 files changed, 397 insertions(+) create mode 100644 meilisearch/tests/settings/proximity_settings.rs diff --git a/meilisearch/tests/settings/mod.rs b/meilisearch/tests/settings/mod.rs index 70125a360..ccb4139e6 100644 --- a/meilisearch/tests/settings/mod.rs +++ b/meilisearch/tests/settings/mod.rs @@ -1,4 +1,5 @@ mod distinct; mod errors; mod get_settings; +mod proximity_settings; mod tokenizer_customization; diff --git a/meilisearch/tests/settings/proximity_settings.rs b/meilisearch/tests/settings/proximity_settings.rs new file mode 100644 index 000000000..d445adbfa --- /dev/null +++ b/meilisearch/tests/settings/proximity_settings.rs @@ -0,0 +1,396 @@ +use meili_snap::{json_string, snapshot}; +use once_cell::sync::Lazy; + +use crate::common::Server; +use crate::json; + +static DOCUMENTS: Lazy = Lazy::new(|| { + json!([ + { + "id": 1, + "a": "Soup of the day", + "b": "many the fish", + }, + { + "id": 2, + "a": "Soup of day", + "b": "many the lazy fish", + }, + { + "id": 3, + "a": "the Soup of day", + "b": "many the fish", + }, + ]) +}); + +#[actix_rt::test] +async fn attribute_scale_search() { + let server = Server::new().await; + let (response, code) = server.set_features(json!({"proximityPrecision": true})).await; + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "scoreDetails": false, + "vectorStore": false, + "metrics": false, + "exportPuffinReports": false, + "proximityPrecision": true + } + "###); + let index = server.index("test"); + + index.add_documents(DOCUMENTS.clone(), None).await; + index.wait_task(0).await; + + let (response, code) = index + .update_settings(json!({ + "proximityPrecision": "attributeScale", + "rankingRules": ["words", "typo", "proximity"], + })) + .await; + assert_eq!("202", code.as_str(), "{:?}", response); + index.wait_task(1).await; + + // the expected order is [1, 3, 2] instead of [3, 1, 2] + // because the attribute scale doesn't make the difference between 1 and 3. + index + .search(json!({"q": "the soup of day"}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "a": "Soup of the day", + "b": "many the fish" + }, + { + "id": 3, + "a": "the Soup of day", + "b": "many the fish" + }, + { + "id": 2, + "a": "Soup of day", + "b": "many the lazy fish" + } + ] + "###); + }) + .await; + + // the expected order is [1, 2, 3] instead of [1, 3, 2] + // because the attribute scale sees all the word in the same attribute + // and so doesn't make the difference between the documents. + index + .search(json!({"q": "many the fish"}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "a": "Soup of the day", + "b": "many the fish" + }, + { + "id": 2, + "a": "Soup of day", + "b": "many the lazy fish" + }, + { + "id": 3, + "a": "the Soup of day", + "b": "many the fish" + } + ] + "###); + }) + .await; +} + +#[actix_rt::test] +async fn attribute_scale_phrase_search() { + let server = Server::new().await; + let (response, code) = server.set_features(json!({"proximityPrecision": true})).await; + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "scoreDetails": false, + "vectorStore": false, + "metrics": false, + "exportPuffinReports": false, + "proximityPrecision": true + } + "###); + let index = server.index("test"); + + index.add_documents(DOCUMENTS.clone(), None).await; + index.wait_task(0).await; + + let (_response, _code) = index + .update_settings(json!({ + "proximityPrecision": "attributeScale", + "rankingRules": ["words", "typo", "proximity"], + })) + .await; + index.wait_task(1).await; + + // the expected order is [1, 3] instead of [3, 1] + // because the attribute scale doesn't make the difference between 1 and 3. + // But 2 shouldn't be returned because "the" is not in the same attribute. + index + .search(json!({"q": "\"the soup of day\""}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "a": "Soup of the day", + "b": "many the fish" + }, + { + "id": 3, + "a": "the Soup of day", + "b": "many the fish" + } + ] + "###); + }) + .await; + + // the expected order is [1, 2, 3] instead of [1, 3] + // because the attribute scale sees all the word in the same attribute + // and so doesn't make the difference between the documents. + index + .search(json!({"q": "\"many the fish\""}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "a": "Soup of the day", + "b": "many the fish" + }, + { + "id": 2, + "a": "Soup of day", + "b": "many the lazy fish" + }, + { + "id": 3, + "a": "the Soup of day", + "b": "many the fish" + } + ] + "###); + }) + .await; +} + +#[actix_rt::test] +async fn word_scale_set_and_reset() { + let server = Server::new().await; + let (response, code) = server.set_features(json!({"proximityPrecision": true})).await; + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "scoreDetails": false, + "vectorStore": false, + "metrics": false, + "exportPuffinReports": false, + "proximityPrecision": true + } + "###); + let index = server.index("test"); + + index.add_documents(DOCUMENTS.clone(), None).await; + index.wait_task(0).await; + + // Set and reset the setting ensuring the swap between the 2 settings is applied. + let (_response, _code) = index + .update_settings(json!({ + "proximityPrecision": "attributeScale", + "rankingRules": ["words", "typo", "proximity"], + })) + .await; + index.wait_task(1).await; + + let (_response, _code) = index + .update_settings(json!({ + "proximityPrecision": "wordScale", + "rankingRules": ["words", "typo", "proximity"], + })) + .await; + index.wait_task(2).await; + + // [3, 1, 2] + index + .search(json!({"q": "the soup of day"}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 3, + "a": "the Soup of day", + "b": "many the fish" + }, + { + "id": 1, + "a": "Soup of the day", + "b": "many the fish" + }, + { + "id": 2, + "a": "Soup of day", + "b": "many the lazy fish" + } + ] + "###); + }) + .await; + + // [1, 3, 2] + index + .search(json!({"q": "many the fish"}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "a": "Soup of the day", + "b": "many the fish" + }, + { + "id": 3, + "a": "the Soup of day", + "b": "many the fish" + }, + { + "id": 2, + "a": "Soup of day", + "b": "many the lazy fish" + } + ] + "###); + }) + .await; + + // [3] + index + .search(json!({"q": "\"the soup of day\""}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 3, + "a": "the Soup of day", + "b": "many the fish" + } + ] + "###); + }) + .await; + + // [1, 3] + index + .search(json!({"q": "\"many the fish\""}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "a": "Soup of the day", + "b": "many the fish" + }, + { + "id": 3, + "a": "the Soup of day", + "b": "many the fish" + } + ] + "###); + }) + .await; +} + +#[actix_rt::test] +async fn attribute_scale_default_ranking_rules() { + let server = Server::new().await; + let (response, code) = server.set_features(json!({"proximityPrecision": true})).await; + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "scoreDetails": false, + "vectorStore": false, + "metrics": false, + "exportPuffinReports": false, + "proximityPrecision": true + } + "###); + let index = server.index("test"); + + index.add_documents(DOCUMENTS.clone(), None).await; + index.wait_task(0).await; + + let (response, code) = index + .update_settings(json!({ + "proximityPrecision": "attributeScale" + })) + .await; + assert_eq!("202", code.as_str(), "{:?}", response); + index.wait_task(1).await; + + // the expected order is [3, 1, 2] + index + .search(json!({"q": "the soup of day"}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 3, + "a": "the Soup of day", + "b": "many the fish" + }, + { + "id": 1, + "a": "Soup of the day", + "b": "many the fish" + }, + { + "id": 2, + "a": "Soup of day", + "b": "many the lazy fish" + } + ] + "###); + }) + .await; + + // the expected order is [1, 3, 2] instead of [1, 3] + // because the attribute scale sees all the word in the same attribute + // and so doesn't remove the document 2. + index + .search(json!({"q": "\"many the fish\""}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "a": "Soup of the day", + "b": "many the fish" + }, + { + "id": 3, + "a": "the Soup of day", + "b": "many the fish" + }, + { + "id": 2, + "a": "Soup of day", + "b": "many the lazy fish" + } + ] + "###); + }) + .await; +} From 467b49153d7d012380ac045ac4ea7da0fa2ab609 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 6 Dec 2023 15:49:02 +0100 Subject: [PATCH 3/7] Implement proximityPrecision setting on milli side --- milli/src/index.rs | 24 +++ milli/src/proximity.rs | 10 + milli/src/search/new/db_cache.rs | 195 +++++++++++++----- .../src/update/index_documents/extract/mod.rs | 24 ++- milli/src/update/index_documents/mod.rs | 2 + milli/src/update/settings.rs | 35 +++- 6 files changed, 224 insertions(+), 66 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 800edcbfc..01a01ac37 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -21,6 +21,7 @@ use crate::heed_codec::facet::{ use crate::heed_codec::{ BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec, }; +use crate::proximity::ProximityPrecision; use crate::readable_slices::ReadableSlices; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, @@ -72,6 +73,7 @@ pub mod main_key { pub const MAX_VALUES_PER_FACET: &str = "max-values-per-facet"; pub const SORT_FACET_VALUES_BY: &str = "sort-facet-values-by"; pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits"; + pub const PROXIMITY_PRECISION: &str = "proximity-precision"; } pub mod db_name { @@ -1466,6 +1468,28 @@ impl Index { self.main.remap_key_type::().delete(txn, main_key::PAGINATION_MAX_TOTAL_HITS) } + pub fn proximity_precision(&self, txn: &RoTxn) -> heed::Result> { + self.main + .remap_types::>() + .get(txn, main_key::PROXIMITY_PRECISION) + } + + pub(crate) fn put_proximity_precision( + &self, + txn: &mut RwTxn, + val: ProximityPrecision, + ) -> heed::Result<()> { + self.main.remap_types::>().put( + txn, + main_key::PROXIMITY_PRECISION, + &val, + ) + } + + pub(crate) fn delete_proximity_precision(&self, txn: &mut RwTxn) -> heed::Result { + self.main.remap_key_type::().delete(txn, main_key::PROXIMITY_PRECISION) + } + /* script language docids */ /// Retrieve all the documents ids that correspond with (Script, Language) key, `None` if it is any. pub fn script_language_documents_ids( diff --git a/milli/src/proximity.rs b/milli/src/proximity.rs index 8261015a3..2745527c1 100644 --- a/milli/src/proximity.rs +++ b/milli/src/proximity.rs @@ -1,5 +1,7 @@ use std::cmp; +use serde::{Deserialize, Serialize}; + use crate::{relative_from_absolute_position, Position}; pub const MAX_DISTANCE: u32 = 4; @@ -25,3 +27,11 @@ pub fn positions_proximity(lhs: Position, rhs: Position) -> u32 { pub fn path_proximity(path: &[Position]) -> u32 { path.windows(2).map(|w| positions_proximity(w[0], w[1])).sum::() } + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] +#[serde(rename_all = "camelCase")] +pub enum ProximityPrecision { + #[default] + WordScale, + AttributeScale, +} diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index b7a74fb62..051e366d0 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -10,6 +10,7 @@ use roaring::RoaringBitmap; use super::interner::Interned; use super::Word; use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec}; +use crate::proximity::ProximityPrecision; use crate::update::{merge_cbo_roaring_bitmaps, MergeFn}; use crate::{ CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec, @@ -263,18 +264,67 @@ impl<'ctx> SearchContext<'ctx> { word2: Interned, proximity: u8, ) -> Result> { - // TODO: if database is empty, search if the word are in the same attribute instead - DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( - self.txn, - (proximity, word1, word2), - &( - proximity, - self.word_interner.get(word1).as_str(), - self.word_interner.get(word2).as_str(), - ), - &mut self.db_cache.word_pair_proximity_docids, - self.index.word_pair_proximity_docids.remap_data_type::(), - ) + match self.index.proximity_precision(self.txn)?.unwrap_or_default() { + ProximityPrecision::AttributeScale => { + // Force proximity to 0 because: + // in AttributeScale, there are only 2 possible distances: + // 1. words in same attribute: in that the DB contains (0, word1, word2) + // 2. words in different attributes: no DB entry for these two words. + let proximity = 0; + let docids = if let Some(docids) = + self.db_cache.word_pair_proximity_docids.get(&(proximity, word1, word2)) + { + docids + .as_ref() + .map(|d| CboRoaringBitmapCodec::bytes_decode_owned(d)) + .transpose() + .map_err(heed::Error::Decoding)? + } else { + // Compute the distance at the attribute level and store it in the cache. + let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? { + fids + } else { + self.index.fields_ids_map(self.txn)?.ids().collect() + }; + let mut docids = RoaringBitmap::new(); + for fid in fids { + // for each field, intersect left word bitmap and right word bitmap, + // then merge the result in a global bitmap before storing it in the cache. + let word1_docids = self.get_db_word_fid_docids(word1, fid)?; + let word2_docids = self.get_db_word_fid_docids(word2, fid)?; + if let (Some(word1_docids), Some(word2_docids)) = + (word1_docids, word2_docids) + { + docids |= word1_docids & word2_docids; + } + } + let encoded = CboRoaringBitmapCodec::bytes_encode(&docids) + .map(Cow::into_owned) + .map(Cow::Owned) + .map(Some) + .map_err(heed::Error::Decoding)?; + self.db_cache + .word_pair_proximity_docids + .insert((proximity, word1, word2), encoded); + Some(docids) + }; + + Ok(docids) + } + ProximityPrecision::WordScale => { + DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( + self.txn, + (proximity, word1, word2), + &( + proximity, + self.word_interner.get(word1).as_str(), + self.word_interner.get(word2).as_str(), + ), + &mut self.db_cache.word_pair_proximity_docids, + self.index.word_pair_proximity_docids.remap_data_type::(), + ) + } + } } pub fn get_db_word_pair_proximity_docids_len( @@ -283,56 +333,95 @@ impl<'ctx> SearchContext<'ctx> { word2: Interned, proximity: u8, ) -> Result> { - // TODO: if database is empty, search if the word are in the same attribute instead - DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>( - self.txn, - (proximity, word1, word2), - &( - proximity, - self.word_interner.get(word1).as_str(), - self.word_interner.get(word2).as_str(), - ), - &mut self.db_cache.word_pair_proximity_docids, - self.index.word_pair_proximity_docids.remap_data_type::(), - ) + match self.index.proximity_precision(self.txn)?.unwrap_or_default() { + ProximityPrecision::AttributeScale => Ok(self + .get_db_word_pair_proximity_docids(word1, word2, proximity)? + .map(|d| d.len())), + ProximityPrecision::WordScale => { + DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>( + self.txn, + (proximity, word1, word2), + &( + proximity, + self.word_interner.get(word1).as_str(), + self.word_interner.get(word2).as_str(), + ), + &mut self.db_cache.word_pair_proximity_docids, + self.index.word_pair_proximity_docids.remap_data_type::(), + ) + } + } } pub fn get_db_word_prefix_pair_proximity_docids( &mut self, word1: Interned, prefix2: Interned, - proximity: u8, + mut proximity: u8, ) -> Result> { - // TODO: if database is empty, search if the word are in the same attribute instead - let docids = match self - .db_cache - .word_prefix_pair_proximity_docids - .entry((proximity, word1, prefix2)) - { - Entry::Occupied(docids) => docids.get().clone(), - Entry::Vacant(entry) => { - // compute docids using prefix iter and store the result in the cache. - let key = U8StrStrCodec::bytes_encode(&( - proximity, - self.word_interner.get(word1).as_str(), - self.word_interner.get(prefix2).as_str(), - )) - .unwrap() - .into_owned(); - let mut prefix_docids = RoaringBitmap::new(); - let remap_key_type = self - .index - .word_pair_proximity_docids - .remap_key_type::() - .prefix_iter(self.txn, &key)?; - for result in remap_key_type { - let (_, docids) = result?; + let proximity_precision = self.index.proximity_precision(self.txn)?.unwrap_or_default(); + if proximity_precision == ProximityPrecision::AttributeScale { + // Force proximity to 0 because: + // in AttributeScale, there are only 2 possible distances: + // 1. words in same attribute: in that the DB contains (0, word1, word2) + // 2. words in different attributes: no DB entry for these two words. + proximity = 0; + } - prefix_docids |= docids; + let docids = if let Some(docids) = + self.db_cache.word_prefix_pair_proximity_docids.get(&(proximity, word1, prefix2)) + { + docids.clone() + } else { + let prefix_docids = match proximity_precision { + ProximityPrecision::AttributeScale => { + // Compute the distance at the attribute level and store it in the cache. + let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? { + fids + } else { + self.index.fields_ids_map(self.txn)?.ids().collect() + }; + let mut prefix_docids = RoaringBitmap::new(); + // for each field, intersect left word bitmap and right word bitmap, + // then merge the result in a global bitmap before storing it in the cache. + for fid in fids { + let word1_docids = self.get_db_word_fid_docids(word1, fid)?; + let prefix2_docids = self.get_db_word_prefix_fid_docids(prefix2, fid)?; + if let (Some(word1_docids), Some(prefix2_docids)) = + (word1_docids, prefix2_docids) + { + prefix_docids |= word1_docids & prefix2_docids; + } + } + prefix_docids } - entry.insert(Some(prefix_docids.clone())); - Some(prefix_docids) - } + ProximityPrecision::WordScale => { + // compute docids using prefix iter and store the result in the cache. + let key = U8StrStrCodec::bytes_encode(&( + proximity, + self.word_interner.get(word1).as_str(), + self.word_interner.get(prefix2).as_str(), + )) + .unwrap() + .into_owned(); + let mut prefix_docids = RoaringBitmap::new(); + let remap_key_type = self + .index + .word_pair_proximity_docids + .remap_key_type::() + .prefix_iter(self.txn, &key)?; + for result in remap_key_type { + let (_, docids) = result?; + + prefix_docids |= docids; + } + prefix_docids + } + }; + self.db_cache + .word_prefix_pair_proximity_docids + .insert((proximity, word1, prefix2), Some(prefix_docids.clone())); + Some(prefix_docids) }; Ok(docids) } diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 366e61c04..57f349894 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -32,6 +32,7 @@ use super::helpers::{ MergeFn, MergeableReader, }; use super::{helpers, TypedChunk}; +use crate::proximity::ProximityPrecision; use crate::{FieldId, Result}; /// Extract data for each databases from obkv documents in parallel. @@ -52,7 +53,7 @@ pub(crate) fn data_from_obkv_documents( dictionary: Option<&[&str]>, max_positions_per_attributes: Option, exact_attributes: HashSet, - // TODO: add a proximity database deactivation parameter. + proximity_precision: ProximityPrecision, ) -> Result<()> { puffin::profile_function!(); @@ -151,16 +152,17 @@ pub(crate) fn data_from_obkv_documents( }); } - // TODO: Skip this part if deactivated - spawn_extraction_task::<_, _, Vec>>>( - docid_word_positions_chunks.clone(), - indexer, - lmdb_writer_sx.clone(), - extract_word_pair_proximity_docids, - merge_deladd_cbo_roaring_bitmaps, - TypedChunk::WordPairProximityDocids, - "word-pair-proximity-docids", - ); + if proximity_precision == ProximityPrecision::WordScale { + spawn_extraction_task::<_, _, Vec>>>( + docid_word_positions_chunks.clone(), + indexer, + lmdb_writer_sx.clone(), + extract_word_pair_proximity_docids, + merge_deladd_cbo_roaring_bitmaps, + TypedChunk::WordPairProximityDocids, + "word-pair-proximity-docids", + ); + } spawn_extraction_task::<_, _, Vec>>>( docid_word_positions_chunks.clone(), diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index be2fbb25e..f825cad1c 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -352,6 +352,7 @@ where let dictionary: Option> = dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?; + let proximity_precision = self.index.proximity_precision(self.wtxn)?.unwrap_or_default(); let pool_params = GrenadParameters { chunk_compression_type: self.indexer_config.chunk_compression_type, @@ -392,6 +393,7 @@ where dictionary.as_deref(), max_positions_per_attributes, exact_attributes, + proximity_precision, ) }); diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 0a069c6df..712e595e9 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -12,6 +12,7 @@ use super::IndexerConfig; use crate::criterion::Criterion; use crate::error::UserError; use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; +use crate::proximity::ProximityPrecision; use crate::update::index_documents::IndexDocumentsMethod; use crate::update::{IndexDocuments, UpdateIndexingStep}; use crate::{FieldsIdsMap, Index, OrderBy, Result}; @@ -127,7 +128,7 @@ pub struct Settings<'a, 't, 'i> { max_values_per_facet: Setting, sort_facet_values_by: Setting>, pagination_max_total_hits: Setting, - // TODO: add a proximity database deactivation attribute. + proximity_precision: Setting, } impl<'a, 't, 'i> Settings<'a, 't, 'i> { @@ -159,6 +160,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { max_values_per_facet: Setting::NotSet, sort_facet_values_by: Setting::NotSet, pagination_max_total_hits: Setting::NotSet, + proximity_precision: Setting::NotSet, indexer_config, } } @@ -333,6 +335,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.pagination_max_total_hits = Setting::Reset; } + pub fn set_proximity_precision(&mut self, value: ProximityPrecision) { + self.proximity_precision = Setting::Set(value); + } + + pub fn reset_proximity_precision(&mut self) { + self.proximity_precision = Setting::Reset; + } + fn reindex( &mut self, progress_callback: &FP, @@ -862,6 +872,24 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { Ok(()) } + fn update_proximity_precision(&mut self) -> Result { + let changed = match self.proximity_precision { + Setting::Set(new) => { + let old = self.index.proximity_precision(self.wtxn)?; + if old == Some(new) { + false + } else { + self.index.put_proximity_precision(self.wtxn, new)?; + true + } + } + Setting::Reset => self.index.delete_proximity_precision(self.wtxn)?, + Setting::NotSet => false, + }; + + Ok(changed) + } + pub fn execute(mut self, progress_callback: FP, should_abort: FA) -> Result<()> where FP: Fn(UpdateIndexingStep) + Sync, @@ -898,6 +926,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { let synonyms_updated = self.update_synonyms()?; let searchable_updated = self.update_searchable()?; let exact_attributes_updated = self.update_exact_attributes()?; + let proximity_precision = self.update_proximity_precision()?; if stop_words_updated || non_separator_tokens_updated @@ -907,7 +936,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { || synonyms_updated || searchable_updated || exact_attributes_updated - // TODO: reindex if proximity database is activated + || proximity_precision { self.reindex(&progress_callback, &should_abort, old_fields_ids_map)?; } @@ -1733,6 +1762,7 @@ mod tests { max_values_per_facet, sort_facet_values_by, pagination_max_total_hits, + proximity_precision, } = settings; assert!(matches!(searchable_fields, Setting::NotSet)); assert!(matches!(displayed_fields, Setting::NotSet)); @@ -1754,6 +1784,7 @@ mod tests { assert!(matches!(max_values_per_facet, Setting::NotSet)); assert!(matches!(sort_facet_values_by, Setting::NotSet)); assert!(matches!(pagination_max_total_hits, Setting::NotSet)); + assert!(matches!(proximity_precision, Setting::NotSet)); }) .unwrap(); } From 8cc3c541179f0a9fc5ecd74c9454bf03b4aaff5d Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 6 Dec 2023 14:46:12 +0100 Subject: [PATCH 4/7] Add proximityPrecision setting in settings route --- meilisearch-types/src/error.rs | 1 + meilisearch-types/src/settings.rs | 47 ++++++++++++++++++++++ meilisearch/src/routes/indexes/settings.rs | 28 +++++++++++++ meilisearch/tests/settings/get_settings.rs | 2 +- 4 files changed, 77 insertions(+), 1 deletion(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 653cb108b..b1dc6b777 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -252,6 +252,7 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; InvalidSearchSort , InvalidRequest , BAD_REQUEST ; InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ; +InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ; InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ; InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs index a57dc0e9a..487354b8e 100644 --- a/meilisearch-types/src/settings.rs +++ b/meilisearch-types/src/settings.rs @@ -8,6 +8,7 @@ use std::str::FromStr; use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef}; use fst::IntoStreamer; +use milli::proximity::ProximityPrecision; use milli::update::Setting; use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET}; use serde::{Deserialize, Serialize, Serializer}; @@ -186,6 +187,9 @@ pub struct Settings { #[deserr(default, error = DeserrJsonError)] pub distinct_attribute: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default, error = DeserrJsonError)] + pub proximity_precision: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] pub typo_tolerance: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] @@ -214,6 +218,7 @@ impl Settings { separator_tokens: Setting::Reset, dictionary: Setting::Reset, distinct_attribute: Setting::Reset, + proximity_precision: Setting::Reset, typo_tolerance: Setting::Reset, faceting: Setting::Reset, pagination: Setting::Reset, @@ -234,6 +239,7 @@ impl Settings { dictionary, synonyms, distinct_attribute, + proximity_precision, typo_tolerance, faceting, pagination, @@ -252,6 +258,7 @@ impl Settings { dictionary, synonyms, distinct_attribute, + proximity_precision, typo_tolerance, faceting, pagination, @@ -296,6 +303,7 @@ impl Settings { separator_tokens: self.separator_tokens, dictionary: self.dictionary, distinct_attribute: self.distinct_attribute, + proximity_precision: self.proximity_precision, typo_tolerance: self.typo_tolerance, faceting: self.faceting, pagination: self.pagination, @@ -390,6 +398,12 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } + match settings.proximity_precision { + Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()), + Setting::Reset => builder.reset_proximity_precision(), + Setting::NotSet => (), + } + match settings.typo_tolerance { Setting::Set(ref value) => { match value.enabled { @@ -509,6 +523,8 @@ pub fn settings( let distinct_field = index.distinct_field(rtxn)?.map(String::from); + let proximity_precision = index.proximity_precision(rtxn)?.map(ProximityPrecisionView::from); + let synonyms = index.user_defined_synonyms(rtxn)?; let min_typo_word_len = MinWordSizeTyposSetting { @@ -575,6 +591,10 @@ pub fn settings( Some(field) => Setting::Set(field), None => Setting::Reset, }, + proximity_precision: match proximity_precision { + Some(precision) => Setting::Set(precision), + None => Setting::Reset, + }, synonyms: Setting::Set(synonyms), typo_tolerance: Setting::Set(typo_tolerance), faceting: Setting::Set(faceting), @@ -679,6 +699,31 @@ impl From for Criterion { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub enum ProximityPrecisionView { + WordScale, + AttributeScale, +} + +impl From for ProximityPrecisionView { + fn from(value: ProximityPrecision) -> Self { + match value { + ProximityPrecision::WordScale => ProximityPrecisionView::WordScale, + ProximityPrecision::AttributeScale => ProximityPrecisionView::AttributeScale, + } + } +} +impl From for ProximityPrecision { + fn from(value: ProximityPrecisionView) -> Self { + match value { + ProximityPrecisionView::WordScale => ProximityPrecision::WordScale, + ProximityPrecisionView::AttributeScale => ProximityPrecision::AttributeScale, + } + } +} + #[cfg(test)] pub(crate) mod test { use super::*; @@ -698,6 +743,7 @@ pub(crate) mod test { dictionary: Setting::NotSet, synonyms: Setting::NotSet, distinct_attribute: Setting::NotSet, + proximity_precision: Setting::NotSet, typo_tolerance: Setting::NotSet, faceting: Setting::NotSet, pagination: Setting::NotSet, @@ -722,6 +768,7 @@ pub(crate) mod test { dictionary: Setting::NotSet, synonyms: Setting::NotSet, distinct_attribute: Setting::NotSet, + proximity_precision: Setting::NotSet, typo_tolerance: Setting::NotSet, faceting: Setting::NotSet, pagination: Setting::NotSet, diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index f5d98761d..c22db24f0 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -435,6 +435,30 @@ make_setting_route!( } ); +make_setting_route!( + "/proximity-precision", + put, + meilisearch_types::settings::ProximityPrecisionView, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision, + >, + proximity_precision, + "proximityPrecision", + analytics, + |precision: &Option, req: &HttpRequest| { + use serde_json::json; + analytics.publish( + "ProximityPrecision Updated".to_string(), + json!({ + "proximity_precision": { + "set": precision.is_some(), + } + }), + Some(req), + ); + } +); + make_setting_route!( "/ranking-rules", put, @@ -541,6 +565,7 @@ generate_configure!( displayed_attributes, searchable_attributes, distinct_attribute, + proximity_precision, stop_words, separator_tokens, non_separator_tokens, @@ -594,6 +619,9 @@ pub async fn update_all( "distinct_attribute": { "set": new_settings.distinct_attribute.as_ref().set().is_some() }, + "proximity_precision": { + "set": new_settings.proximity_precision.as_ref().set().is_some() + }, "typo_tolerance": { "enabled": new_settings.typo_tolerance .as_ref() diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index b5c4644a5..0ea556b94 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -54,7 +54,7 @@ async fn get_settings() { let (response, code) = index.settings().await; assert_eq!(code, 200); let settings = response.as_object().unwrap(); - assert_eq!(settings.keys().len(), 14); + assert_eq!(settings.keys().len(), 15); assert_eq!(settings["displayedAttributes"], json!(["*"])); assert_eq!(settings["searchableAttributes"], json!(["*"])); assert_eq!(settings["filterableAttributes"], json!([])); From 1f4fc9c2292d2e81271fd47b039243e5f5186411 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 6 Dec 2023 14:47:34 +0100 Subject: [PATCH 5/7] Make the feature experimental --- index-scheduler/src/batch.rs | 3 +++ index-scheduler/src/features.rs | 13 +++++++++++++ meilisearch-types/src/features.rs | 1 + meilisearch/src/routes/features.rs | 8 ++++++++ meilisearch/tests/features/mod.rs | 20 +++++++++++++------- 5 files changed, 38 insertions(+), 7 deletions(-) diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 5a886acf8..9089acb69 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -1343,6 +1343,9 @@ impl IndexScheduler { for (task, (_, settings)) in tasks.iter_mut().zip(settings) { let checked_settings = settings.clone().check(); + if checked_settings.proximity_precision.set().is_some() { + self.features.features().check_proximity_precision()?; + } task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) }); apply_settings_to_builder(&checked_settings, &mut builder); diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index 1db27bcd5..ae2823c30 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -81,6 +81,19 @@ impl RoFeatures { .into()) } } + + pub fn check_proximity_precision(&self) -> Result<()> { + if self.runtime.proximity_precision { + Ok(()) + } else { + Err(FeatureNotEnabledError { + disabled_action: "Using `proximityPrecision` index setting", + feature: "proximity precision", + issue_link: "https://github.com/orgs/meilisearch/discussions/710", + } + .into()) + } + } } impl FeatureData { diff --git a/meilisearch-types/src/features.rs b/meilisearch-types/src/features.rs index 33afe2d24..f0cbce340 100644 --- a/meilisearch-types/src/features.rs +++ b/meilisearch-types/src/features.rs @@ -7,6 +7,7 @@ pub struct RuntimeTogglableFeatures { pub vector_store: bool, pub metrics: bool, pub export_puffin_reports: bool, + pub proximity_precision: bool, } #[derive(Default, Debug, Clone, Copy)] diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index e7fd8de22..29e922eba 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -48,6 +48,8 @@ pub struct RuntimeTogglableFeatures { pub metrics: Option, #[deserr(default)] pub export_puffin_reports: Option, + #[deserr(default)] + pub proximity_precision: Option, } async fn patch_features( @@ -70,6 +72,10 @@ async fn patch_features( .0 .export_puffin_reports .unwrap_or(old_features.export_puffin_reports), + proximity_precision: new_features + .0 + .proximity_precision + .unwrap_or(old_features.proximity_precision), }; // explicitly destructure for analytics rather than using the `Serialize` implementation, because @@ -80,6 +86,7 @@ async fn patch_features( vector_store, metrics, export_puffin_reports, + proximity_precision, } = new_features; analytics.publish( @@ -89,6 +96,7 @@ async fn patch_features( "vector_store": vector_store, "metrics": metrics, "export_puffin_reports": export_puffin_reports, + "proximity_precision": proximity_precision, }), Some(&req), ); diff --git a/meilisearch/tests/features/mod.rs b/meilisearch/tests/features/mod.rs index abb006ac8..812a5c6bb 100644 --- a/meilisearch/tests/features/mod.rs +++ b/meilisearch/tests/features/mod.rs @@ -21,7 +21,8 @@ async fn experimental_features() { "scoreDetails": false, "vectorStore": false, "metrics": false, - "exportPuffinReports": false + "exportPuffinReports": false, + "proximityPrecision": false } "###); @@ -33,7 +34,8 @@ async fn experimental_features() { "scoreDetails": false, "vectorStore": true, "metrics": false, - "exportPuffinReports": false + "exportPuffinReports": false, + "proximityPrecision": false } "###); @@ -45,7 +47,8 @@ async fn experimental_features() { "scoreDetails": false, "vectorStore": true, "metrics": false, - "exportPuffinReports": false + "exportPuffinReports": false, + "proximityPrecision": false } "###); @@ -58,7 +61,8 @@ async fn experimental_features() { "scoreDetails": false, "vectorStore": true, "metrics": false, - "exportPuffinReports": false + "exportPuffinReports": false, + "proximityPrecision": false } "###); @@ -71,7 +75,8 @@ async fn experimental_features() { "scoreDetails": false, "vectorStore": true, "metrics": false, - "exportPuffinReports": false + "exportPuffinReports": false, + "proximityPrecision": false } "###); } @@ -91,7 +96,8 @@ async fn experimental_feature_metrics() { "scoreDetails": false, "vectorStore": false, "metrics": true, - "exportPuffinReports": false + "exportPuffinReports": false, + "proximityPrecision": false } "###); @@ -146,7 +152,7 @@ async fn errors() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`", + "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`, `proximityPrecision`", "code": "bad_request", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#bad_request" From 03ffabe889c84762e2ff9194093fb85addce687c Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 6 Dec 2023 14:52:51 +0100 Subject: [PATCH 6/7] Add a new dump test --- ..._use_deactivated_experimental_setting.dump | Bin 0 -> 1522 bytes meilisearch/tests/dumps/data.rs | 6 + meilisearch/tests/dumps/mod.rs | 105 ++++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 meilisearch/tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump diff --git a/meilisearch/tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump b/meilisearch/tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump new file mode 100644 index 0000000000000000000000000000000000000000..4f50733fd179e3b36e5a36fb822783deaaa9209f GIT binary patch literal 1522 zcmVY!mkw^Si{|%th|HVvx z5$9S4PtZTX4sP||05)R5g(;edd}`nSQZ^x*P4it|j&Sho0s zFQ62vx^|Rtrg(g*pn$+;-NU4Zyc_HcC>{{hM=r+H`2>>1JpR5U_4*;`*d?Fr`%a6W z1Y7o>bEQQ3Id9VbG1~q9Q{SPj{WpN69gBRPu!S~Tm5FhoU>g|xWxulj?%FwLSu|zi zgzu@Gh(z&hr&2eQnc7#LvGf*J*ZU$VyZI`!SnoM%F_&iRk#}My)WSSk$@!nA-ZV{R zI^=VfF(tDbdCSxKQTqyG5h)>4*v2ZM1vFpGpmuq$YjarRk$M=NJxGfrS$qC<#v_qK z53rB1T51@vgja*TkqNwc8bQ;g%~SoZu!*Xy;9YwrQa$dW_{QJBus~Wr*On>Q_x8Yr zF22gDJS@kfJ9w9_z3xNV<`jbTghgDNsP1#d?mn`l;Q2Mr-sp1hjLF9ITk{SOxM2;6{v0e?ED$07d@ zLEZNG-}U`=|8D?{#POJM+GCVQJsOdq7dVvkJmgJy40>?9XY(#CejNO6&cWjc%7-Pr z2{`&i#!u`09drJ}JN_S~UTgmiU}@)(%=i_Y`$YoZ{s~KRaALmj2z;s`{PKq~;07`g ztz=;7fS++)OeP|I&-MMPZl5b?==fYVCpDi4;f3jo$*vGiF{{{f(zhXB3$g}FW z^U;GNPCQ-};J({Ox(vjwm_96RQ#)jULCB$@i(6{vx#5;CP8#dkheP}ic6Q=_n0T%I zHvzDJo=2IOmsEiLA?KD*mXcP<{aBMs&dRV%CR4>z%?PJ#ruTs>K4C?otYIm6dHvSX zv~jnhlqaDR1c4XzCQ(Rx6gj9j=GgCXH>PojNDz5?V_}A?F?>+@NX`CKwB!TV1ipp< z$8^M0P1aPbv?teP!o3Bnckq93>kw&zQ-aQv6mfxN?`{!x-KMr=={Er4;^nXzr_6QjL zbL0R$CI83w+Wem;Q0o8Rf*1U3Zr}m`kD}f4A8}j%uMt%7e^hx2A2yQ#U~W_a&V!v7 zQN(%7<1zqd;$C&Ec>DgCFd9UlL_Ugn!qti!0`DbB+`ZGB+quW&y z4e)GJsrv+`G=b3URh?+5sg?SE6v-l0WuDxzTBtj^jANmN-eoI&!0PAX3pnf#PcVI# zXM)Yr!#dFUJCuNbe^3XdWEg~pbzqW$JmdsxI&ju?P@Yj9pMPMcUO!CH($@~ literal 0 HcmV?d00001 diff --git a/meilisearch/tests/dumps/data.rs b/meilisearch/tests/dumps/data.rs index 5df09bfd1..d353aaf1d 100644 --- a/meilisearch/tests/dumps/data.rs +++ b/meilisearch/tests/dumps/data.rs @@ -20,6 +20,8 @@ pub enum GetDump { RubyGemsWithSettingsV4, TestV5, + + TestV6WithExperimental, } impl GetDump { @@ -68,6 +70,10 @@ impl GetDump { GetDump::TestV5 => { exist_relative_path!("tests/assets/v5_v0.28.0_test_dump.dump").into() } + GetDump::TestV6WithExperimental => exist_relative_path!( + "tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump" + ) + .into(), } } } diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs index e60893d4e..c69623e9e 100644 --- a/meilisearch/tests/dumps/mod.rs +++ b/meilisearch/tests/dumps/mod.rs @@ -1810,3 +1810,108 @@ async fn import_dump_v5() { json_string!(tasks, { ".results[].details.dumpUid" => "[uid]", ".results[].duration" => "[duration]" , ".results[].startedAt" => "[date]" , ".results[].finishedAt" => "[date]" }) ); } + +#[actix_rt::test] +async fn import_dump_v6_containing_experimental_features() { + let temp = tempfile::tempdir().unwrap(); + + let options = Opt { + import_dump: Some(GetDump::TestV6WithExperimental.path()), + ..default_settings(temp.path()) + }; + let mut server = Server::new_auth_with_options(options, temp).await; + server.use_api_key("MASTER_KEY"); + + let (indexes, code) = server.list_indexes(None, None).await; + assert_eq!(code, 200, "{indexes}"); + + assert_eq!(indexes["results"].as_array().unwrap().len(), 1); + assert_eq!(indexes["results"][0]["uid"], json!("movies")); + assert_eq!(indexes["results"][0]["primaryKey"], json!("id")); + + let (response, code) = server.get_features().await; + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "scoreDetails": false, + "vectorStore": false, + "metrics": false, + "exportPuffinReports": false, + "proximityPrecision": false + } + "###); + + let index = server.index("movies"); + + let (response, code) = index.settings().await; + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "sortableAttributes": [], + "rankingRules": [ + "words", + "typo", + "proximity" + ], + "stopWords": [], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "proximityPrecision": "attributeScale", + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + } + } + "###); + + // the expected order is [1, 3, 2] instead of [3, 1, 2] + // because the attribute scale doesn't make the difference between 1 and 3. + index + .search(json!({"q": "the soup of day"}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "a": "Soup of the day", + "b": "many the fish" + }, + { + "id": 3, + "a": "the Soup of day", + "b": "many the fish" + }, + { + "id": 2, + "a": "Soup of day", + "b": "many the lazy fish" + } + ] + "###); + }) + .await; +} From c9860c7913ddab838321ce10457082b7b682e293 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 6 Dec 2023 14:49:29 +0100 Subject: [PATCH 7/7] Small test fixes --- dump/src/lib.rs | 1 + dump/src/reader/compat/v5_to_v6.rs | 1 + meilisearch/tests/dumps/mod.rs | 12 ++++++++++++ 3 files changed, 14 insertions(+) diff --git a/dump/src/lib.rs b/dump/src/lib.rs index fa3cfb49a..15b281c41 100644 --- a/dump/src/lib.rs +++ b/dump/src/lib.rs @@ -267,6 +267,7 @@ pub(crate) mod test { dictionary: Setting::NotSet, synonyms: Setting::NotSet, distinct_attribute: Setting::NotSet, + proximity_precision: Setting::NotSet, typo_tolerance: Setting::NotSet, faceting: Setting::Set(FacetingSettings { max_values_per_facet: Setting::Set(111), diff --git a/dump/src/reader/compat/v5_to_v6.rs b/dump/src/reader/compat/v5_to_v6.rs index 9e938d756..8a0d6e5e1 100644 --- a/dump/src/reader/compat/v5_to_v6.rs +++ b/dump/src/reader/compat/v5_to_v6.rs @@ -345,6 +345,7 @@ impl From> for v6::Settings { dictionary: v6::Setting::NotSet, synonyms: settings.synonyms.into(), distinct_attribute: settings.distinct_attribute.into(), + proximity_precision: v6::Setting::NotSet, typo_tolerance: match settings.typo_tolerance { v5::Setting::Set(typo) => v6::Setting::Set(v6::TypoTolerance { enabled: typo.enabled.into(), diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs index c69623e9e..9e949436a 100644 --- a/meilisearch/tests/dumps/mod.rs +++ b/meilisearch/tests/dumps/mod.rs @@ -59,6 +59,7 @@ async fn import_dump_v1_movie_raw() { "dictionary": [], "synonyms": {}, "distinctAttribute": null, + "proximityPrecision": null, "typoTolerance": { "enabled": true, "minWordSizeForTypos": { @@ -219,6 +220,7 @@ async fn import_dump_v1_movie_with_settings() { "dictionary": [], "synonyms": {}, "distinctAttribute": null, + "proximityPrecision": null, "typoTolerance": { "enabled": true, "minWordSizeForTypos": { @@ -365,6 +367,7 @@ async fn import_dump_v1_rubygems_with_settings() { "dictionary": [], "synonyms": {}, "distinctAttribute": null, + "proximityPrecision": null, "typoTolerance": { "enabled": true, "minWordSizeForTypos": { @@ -497,6 +500,7 @@ async fn import_dump_v2_movie_raw() { "dictionary": [], "synonyms": {}, "distinctAttribute": null, + "proximityPrecision": null, "typoTolerance": { "enabled": true, "minWordSizeForTypos": { @@ -641,6 +645,7 @@ async fn import_dump_v2_movie_with_settings() { "dictionary": [], "synonyms": {}, "distinctAttribute": null, + "proximityPrecision": null, "typoTolerance": { "enabled": true, "minWordSizeForTypos": { @@ -784,6 +789,7 @@ async fn import_dump_v2_rubygems_with_settings() { "dictionary": [], "synonyms": {}, "distinctAttribute": null, + "proximityPrecision": null, "typoTolerance": { "enabled": true, "minWordSizeForTypos": { @@ -916,6 +922,7 @@ async fn import_dump_v3_movie_raw() { "dictionary": [], "synonyms": {}, "distinctAttribute": null, + "proximityPrecision": null, "typoTolerance": { "enabled": true, "minWordSizeForTypos": { @@ -1060,6 +1067,7 @@ async fn import_dump_v3_movie_with_settings() { "dictionary": [], "synonyms": {}, "distinctAttribute": null, + "proximityPrecision": null, "typoTolerance": { "enabled": true, "minWordSizeForTypos": { @@ -1203,6 +1211,7 @@ async fn import_dump_v3_rubygems_with_settings() { "dictionary": [], "synonyms": {}, "distinctAttribute": null, + "proximityPrecision": null, "typoTolerance": { "enabled": true, "minWordSizeForTypos": { @@ -1335,6 +1344,7 @@ async fn import_dump_v4_movie_raw() { "dictionary": [], "synonyms": {}, "distinctAttribute": null, + "proximityPrecision": null, "typoTolerance": { "enabled": true, "minWordSizeForTypos": { @@ -1479,6 +1489,7 @@ async fn import_dump_v4_movie_with_settings() { "dictionary": [], "synonyms": {}, "distinctAttribute": null, + "proximityPrecision": null, "typoTolerance": { "enabled": true, "minWordSizeForTypos": { @@ -1622,6 +1633,7 @@ async fn import_dump_v4_rubygems_with_settings() { "dictionary": [], "synonyms": {}, "distinctAttribute": null, + "proximityPrecision": null, "typoTolerance": { "enabled": true, "minWordSizeForTypos": {