From d6c2ee15a9c702a54ccc3c9e825c562b0c7b56f9 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 28 Nov 2023 14:55:29 +0100 Subject: [PATCH] Filter on attributes before computing the docids when attribute restriction is on --- milli/src/search/new/db_cache.rs | 70 ++++++++++++++++++++++++-------- milli/src/search/new/mod.rs | 19 ++++++--- 2 files changed, 66 insertions(+), 23 deletions(-) diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index e0a2ba3cf..d7ef031bb 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -154,7 +154,7 @@ impl<'ctx> SearchContext<'ctx> { /// Retrieve or insert the given value in the `word_docids` database. fn get_db_word_docids(&mut self, word: Interned) -> Result> { - match &self.restricted_fids { + match &self.restricted_tolerant_fids { Some(restricted_fids) => { let interned = self.word_interner.get(word).as_str(); let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); @@ -182,13 +182,28 @@ impl<'ctx> SearchContext<'ctx> { &mut self, word: Interned, ) -> Result> { - DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( - self.txn, - word, - self.word_interner.get(word).as_str(), - &mut self.db_cache.exact_word_docids, - self.index.exact_word_docids.remap_data_type::(), - ) + match &self.restricted_exact_fids { + Some(restricted_fids) => { + let interned = self.word_interner.get(word).as_str(); + let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); + + DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( + self.txn, + word, + &keys[..], + &mut self.db_cache.exact_word_docids, + self.index.word_fid_docids.remap_data_type::(), + merge_cbo_roaring_bitmaps, + ) + } + None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( + self.txn, + word, + self.word_interner.get(word).as_str(), + &mut self.db_cache.exact_word_docids, + self.index.exact_word_docids.remap_data_type::(), + ), + } } pub fn word_prefix_docids(&mut self, prefix: Word) -> Result> { @@ -216,7 +231,7 @@ impl<'ctx> SearchContext<'ctx> { &mut self, prefix: Interned, ) -> Result> { - match &self.restricted_fids { + match &self.restricted_tolerant_fids { Some(restricted_fids) => { let interned = self.word_interner.get(prefix).as_str(); let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); @@ -244,13 +259,28 @@ impl<'ctx> SearchContext<'ctx> { &mut self, prefix: Interned, ) -> Result> { - DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( - self.txn, - prefix, - self.word_interner.get(prefix).as_str(), - &mut self.db_cache.exact_word_prefix_docids, - self.index.exact_word_prefix_docids.remap_data_type::(), - ) + match &self.restricted_exact_fids { + Some(restricted_fids) => { + let interned = self.word_interner.get(prefix).as_str(); + let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); + + DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( + self.txn, + prefix, + &keys[..], + &mut self.db_cache.exact_word_prefix_docids, + self.index.word_prefix_fid_docids.remap_data_type::(), + merge_cbo_roaring_bitmaps, + ) + } + None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>( + self.txn, + prefix, + self.word_interner.get(prefix).as_str(), + &mut self.db_cache.exact_word_prefix_docids, + self.index.exact_word_prefix_docids.remap_data_type::(), + ), + } } pub fn get_db_word_pair_proximity_docids( @@ -334,7 +364,9 @@ impl<'ctx> SearchContext<'ctx> { fid: u16, ) -> Result> { // if the requested fid isn't in the restricted list, return None. - if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) { + if self.restricted_tolerant_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) + && self.restricted_exact_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) + { return Ok(None); } @@ -353,7 +385,9 @@ impl<'ctx> SearchContext<'ctx> { fid: u16, ) -> Result> { // if the requested fid isn't in the restricted list, return None. - if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) { + if self.restricted_tolerant_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) + && self.restricted_exact_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) + { return Ok(None); } diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 6ceb78223..56c55d031 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -63,7 +63,8 @@ pub struct SearchContext<'ctx> { pub phrase_interner: DedupInterner, pub term_interner: Interner, pub phrase_docids: PhraseDocIdsCache, - pub restricted_fids: Option>, + pub restricted_tolerant_fids: Option>, + pub restricted_exact_fids: Option>, } impl<'ctx> SearchContext<'ctx> { @@ -76,15 +77,18 @@ impl<'ctx> SearchContext<'ctx> { phrase_interner: <_>::default(), term_interner: <_>::default(), phrase_docids: <_>::default(), - restricted_fids: None, + restricted_tolerant_fids: None, + restricted_exact_fids: None, } } pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> { let fids_map = self.index.fields_ids_map(self.txn)?; let searchable_names = self.index.searchable_fields(self.txn)?; + let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?; - let mut restricted_fids = Vec::new(); + let mut restricted_exact_fids = Vec::new(); + let mut restricted_tolerant_fids = Vec::new(); let mut contains_wildcard = false; for field_name in searchable_attributes { if field_name == "*" { @@ -123,10 +127,15 @@ impl<'ctx> SearchContext<'ctx> { } }; - restricted_fids.push(fid); + if exact_attributes_ids.contains(&fid) { + restricted_exact_fids.push(fid); + } else { + restricted_tolerant_fids.push(fid); + }; } - self.restricted_fids = (!contains_wildcard).then_some(restricted_fids); + self.restricted_exact_fids = (!contains_wildcard).then_some(restricted_exact_fids); + self.restricted_tolerant_fids = (!contains_wildcard).then_some(restricted_tolerant_fids); Ok(()) }