From dc077901335fe4ca7e32b53ebdb7c0b052f8c810 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 27 Nov 2023 11:03:16 +0100 Subject: [PATCH 1/3] Add test reproducing #4232 --- .../tests/search/restrict_searchable.rs | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/meilisearch/tests/search/restrict_searchable.rs b/meilisearch/tests/search/restrict_searchable.rs index cfdff95ee..7bbdca38f 100644 --- a/meilisearch/tests/search/restrict_searchable.rs +++ b/meilisearch/tests/search/restrict_searchable.rs @@ -335,3 +335,35 @@ async fn exactness_ranking_rule_order() { }) .await; } + +#[actix_rt::test] +async fn search_on_exact_field() { + let server = Server::new().await; + let index = index_with_documents( + &server, + &json!([ + { + "title": "Captain Marvel", + "exact": "Captain Marivel", + "id": "1", + }, + { + "title": "Captain Marivel", + "exact": "Captain the Marvel", + "id": "2", + }]), + ) + .await; + + let (response, code) = + index.update_settings_typo_tolerance(json!({ "disableOnAttributes": ["exact"] })).await; + assert_eq!(202, code, "{:?}", response); + index.wait_task(1).await; + // Searching on an exact attribute should only return the document matching without typo. + index + .search(json!({"q": "Marvel", "attributesToSearchOn": ["exact"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(response["hits"].as_array().unwrap().len(), @"1"); + }) + .await; +} From d6c2ee15a9c702a54ccc3c9e825c562b0c7b56f9 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 28 Nov 2023 14:55:29 +0100 Subject: [PATCH 2/3] Filter on attributes before computing the docids when attribute restriction is on --- milli/src/search/new/db_cache.rs | 70 ++++++++++++++++++++++++-------- milli/src/search/new/mod.rs | 19 ++++++--- 2 files changed, 66 insertions(+), 23 deletions(-) diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index e0a2ba3cf..d7ef031bb 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -154,7 +154,7 @@ impl<'ctx> SearchContext<'ctx> { /// Retrieve or insert the given value in the `word_docids` database. fn get_db_word_docids(&mut self, word: Interned) -> Result> { - match &self.restricted_fids { + match &self.restricted_tolerant_fids { Some(restricted_fids) => { let interned = self.word_interner.get(word).as_str(); let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); @@ -182,13 +182,28 @@ impl<'ctx> SearchContext<'ctx> { &mut self, word: Interned, ) -> Result> { - DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( - self.txn, - word, - self.word_interner.get(word).as_str(), - &mut self.db_cache.exact_word_docids, - self.index.exact_word_docids.remap_data_type::(), - ) + match &self.restricted_exact_fids { + Some(restricted_fids) => { + let interned = self.word_interner.get(word).as_str(); + let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); + + DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( + self.txn, + word, + &keys[..], + &mut self.db_cache.exact_word_docids, + self.index.word_fid_docids.remap_data_type::(), + merge_cbo_roaring_bitmaps, + ) + } + None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( + self.txn, + word, + self.word_interner.get(word).as_str(), + &mut self.db_cache.exact_word_docids, + self.index.exact_word_docids.remap_data_type::(), + ), + } } pub fn word_prefix_docids(&mut self, prefix: Word) -> Result> { @@ -216,7 +231,7 @@ impl<'ctx> SearchContext<'ctx> { &mut self, prefix: Interned, ) -> Result> { - match &self.restricted_fids { + match &self.restricted_tolerant_fids { Some(restricted_fids) => { let interned = self.word_interner.get(prefix).as_str(); let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); @@ -244,13 +259,28 @@ impl<'ctx> SearchContext<'ctx> { &mut self, prefix: Interned, ) -> Result> { - DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( - self.txn, - prefix, - self.word_interner.get(prefix).as_str(), - &mut self.db_cache.exact_word_prefix_docids, - self.index.exact_word_prefix_docids.remap_data_type::(), - ) + match &self.restricted_exact_fids { + Some(restricted_fids) => { + let interned = self.word_interner.get(prefix).as_str(); + let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); + + DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( + self.txn, + prefix, + &keys[..], + &mut self.db_cache.exact_word_prefix_docids, + self.index.word_prefix_fid_docids.remap_data_type::(), + merge_cbo_roaring_bitmaps, + ) + } + None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( + self.txn, + prefix, + self.word_interner.get(prefix).as_str(), + &mut self.db_cache.exact_word_prefix_docids, + self.index.exact_word_prefix_docids.remap_data_type::(), + ), + } } pub fn get_db_word_pair_proximity_docids( @@ -334,7 +364,9 @@ impl<'ctx> SearchContext<'ctx> { fid: u16, ) -> Result> { // if the requested fid isn't in the restricted list, return None. - if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) { + if self.restricted_tolerant_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) + && self.restricted_exact_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) + { return Ok(None); } @@ -353,7 +385,9 @@ impl<'ctx> SearchContext<'ctx> { fid: u16, ) -> Result> { // if the requested fid isn't in the restricted list, return None. - if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) { + if self.restricted_tolerant_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) + && self.restricted_exact_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) + { return Ok(None); } diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 6ceb78223..56c55d031 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -63,7 +63,8 @@ pub struct SearchContext<'ctx> { pub phrase_interner: DedupInterner, pub term_interner: Interner, pub phrase_docids: PhraseDocIdsCache, - pub restricted_fids: Option>, + pub restricted_tolerant_fids: Option>, + pub restricted_exact_fids: Option>, } impl<'ctx> SearchContext<'ctx> { @@ -76,15 +77,18 @@ impl<'ctx> SearchContext<'ctx> { phrase_interner: <_>::default(), term_interner: <_>::default(), phrase_docids: <_>::default(), - restricted_fids: None, + restricted_tolerant_fids: None, + restricted_exact_fids: None, } } pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> { let fids_map = self.index.fields_ids_map(self.txn)?; let searchable_names = self.index.searchable_fields(self.txn)?; + let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?; - let mut restricted_fids = Vec::new(); + let mut restricted_exact_fids = Vec::new(); + let mut restricted_tolerant_fids = Vec::new(); let mut contains_wildcard = false; for field_name in searchable_attributes { if field_name == "*" { @@ -123,10 +127,15 @@ impl<'ctx> SearchContext<'ctx> { } }; - restricted_fids.push(fid); + if exact_attributes_ids.contains(&fid) { + restricted_exact_fids.push(fid); + } else { + restricted_tolerant_fids.push(fid); + }; } - self.restricted_fids = (!contains_wildcard).then_some(restricted_fids); + self.restricted_exact_fids = (!contains_wildcard).then_some(restricted_exact_fids); + self.restricted_tolerant_fids = (!contains_wildcard).then_some(restricted_tolerant_fids); Ok(()) } From 3b3fa38f27d0f18b7e34a7030ec4c704c8fac49a Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 28 Nov 2023 18:37:57 +0100 Subject: [PATCH 3/3] Put the restrict list in a sub-struct --- milli/src/search/new/db_cache.rs | 28 ++++++++++++++-------------- milli/src/search/new/mod.rs | 31 ++++++++++++++++++++----------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index d7ef031bb..ce846009a 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -154,10 +154,11 @@ impl<'ctx> SearchContext<'ctx> { /// Retrieve or insert the given value in the `word_docids` database. fn get_db_word_docids(&mut self, word: Interned) -> Result> { - match &self.restricted_tolerant_fids { + match &self.restricted_fids { Some(restricted_fids) => { let interned = self.word_interner.get(word).as_str(); - let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); + let keys: Vec<_> = + restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect(); DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( self.txn, @@ -182,10 +183,11 @@ impl<'ctx> SearchContext<'ctx> { &mut self, word: Interned, ) -> Result> { - match &self.restricted_exact_fids { + match &self.restricted_fids { Some(restricted_fids) => { let interned = self.word_interner.get(word).as_str(); - let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); + let keys: Vec<_> = + restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect(); DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( self.txn, @@ -231,10 +233,11 @@ impl<'ctx> SearchContext<'ctx> { &mut self, prefix: Interned, ) -> Result> { - match &self.restricted_tolerant_fids { + match &self.restricted_fids { Some(restricted_fids) => { let interned = self.word_interner.get(prefix).as_str(); - let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); + let keys: Vec<_> = + restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect(); DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( self.txn, @@ -259,10 +262,11 @@ impl<'ctx> SearchContext<'ctx> { &mut self, prefix: Interned, ) -> Result> { - match &self.restricted_exact_fids { + match &self.restricted_fids { Some(restricted_fids) => { let interned = self.word_interner.get(prefix).as_str(); - let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); + let keys: Vec<_> = + restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect(); DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( self.txn, @@ -364,9 +368,7 @@ impl<'ctx> SearchContext<'ctx> { fid: u16, ) -> Result> { // if the requested fid isn't in the restricted list, return None. - if self.restricted_tolerant_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) - && self.restricted_exact_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) - { + if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) { return Ok(None); } @@ -385,9 +387,7 @@ impl<'ctx> SearchContext<'ctx> { fid: u16, ) -> Result> { // if the requested fid isn't in the restricted list, return None. - if self.restricted_tolerant_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) - && self.restricted_exact_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) - { + if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) { return Ok(None); } diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 56c55d031..ba29dbd1f 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -51,7 +51,8 @@ use crate::error::FieldIdMapMissingEntry; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::search::new::distinct::apply_distinct_rule; use crate::{ - AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, BEU32, + AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, + BEU32, }; /// A structure used throughout the execution of a search query. @@ -63,8 +64,7 @@ pub struct SearchContext<'ctx> { pub phrase_interner: DedupInterner, pub term_interner: Interner, pub phrase_docids: PhraseDocIdsCache, - pub restricted_tolerant_fids: Option>, - pub restricted_exact_fids: Option>, + pub restricted_fids: Option, } impl<'ctx> SearchContext<'ctx> { @@ -77,8 +77,7 @@ impl<'ctx> SearchContext<'ctx> { phrase_interner: <_>::default(), term_interner: <_>::default(), phrase_docids: <_>::default(), - restricted_tolerant_fids: None, - restricted_exact_fids: None, + restricted_fids: None, } } @@ -87,8 +86,7 @@ impl<'ctx> SearchContext<'ctx> { let searchable_names = self.index.searchable_fields(self.txn)?; let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?; - let mut restricted_exact_fids = Vec::new(); - let mut restricted_tolerant_fids = Vec::new(); + let mut restricted_fids = RestrictedFids::default(); let mut contains_wildcard = false; for field_name in searchable_attributes { if field_name == "*" { @@ -128,14 +126,13 @@ impl<'ctx> SearchContext<'ctx> { }; if exact_attributes_ids.contains(&fid) { - restricted_exact_fids.push(fid); + restricted_fids.exact.push(fid); } else { - restricted_tolerant_fids.push(fid); + restricted_fids.tolerant.push(fid); }; } - self.restricted_exact_fids = (!contains_wildcard).then_some(restricted_exact_fids); - self.restricted_tolerant_fids = (!contains_wildcard).then_some(restricted_tolerant_fids); + self.restricted_fids = (!contains_wildcard).then_some(restricted_fids); Ok(()) } @@ -156,6 +153,18 @@ impl Word { } } +#[derive(Debug, Clone, Default)] +pub struct RestrictedFids { + pub tolerant: Vec, + pub exact: Vec, +} + +impl RestrictedFids { + pub fn contains(&self, fid: &FieldId) -> bool { + self.tolerant.contains(fid) || self.exact.contains(fid) + } +} + /// Apply the [`TermsMatchingStrategy`] to the query graph and resolve it. fn resolve_maximally_reduced_query_graph( ctx: &mut SearchContext,