From dd12d441343f7b2627da4cfe7377c428e0e182f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 21 Feb 2023 15:10:22 +0100 Subject: [PATCH] Support swapped word pairs in new proximity ranking rule impl --- milli/src/search/new/db_cache.rs | 23 +++++++++++++++++++ .../proximity/compute_docids.rs | 5 ++++ .../new/ranking_rule_graph/proximity/mod.rs | 2 ++ 3 files changed, 30 insertions(+) diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index ae7cb9b91..4232cadaa 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -11,6 +11,8 @@ pub struct DatabaseCache<'transaction> { pub word_pair_proximity_docids: FxHashMap<(u8, String, String), Option<&'transaction [u8]>>, pub word_prefix_pair_proximity_docids: FxHashMap<(u8, String, String), Option<&'transaction [u8]>>, + pub prefix_word_pair_proximity_docids: + FxHashMap<(u8, String, String), Option<&'transaction [u8]>>, pub word_docids: FxHashMap>, pub exact_word_docids: FxHashMap>, pub word_prefix_docids: FxHashMap>, @@ -115,4 +117,25 @@ impl<'transaction> DatabaseCache<'transaction> { } } } + pub fn get_prefix_word_pair_proximity_docids( + &mut self, + index: &Index, + txn: &'transaction RoTxn, + word1: &str, + prefix2: &str, + proximity: u8, + ) -> Result> { + let key = (proximity, prefix2.to_owned(), word1.to_owned()); + match self.prefix_word_pair_proximity_docids.entry(key) { + Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), + Entry::Vacant(entry) => { + let bitmap_ptr = index + .prefix_word_pair_proximity_docids + .remap_data_type::() + .get(txn, &(proximity, prefix2, word1))?; + entry.insert(bitmap_ptr); + Ok(bitmap_ptr) + } + } + } } diff --git a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs index 51c6d6ad5..908f50ef6 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs @@ -20,6 +20,11 @@ pub fn compute_docids<'transaction>( } WordPair::WordPrefix { left, right_prefix } => db_cache .get_word_prefix_pair_proximity_docids(index, txn, left, right_prefix, *proximity), + WordPair::WordsSwapped { left, right } => { + db_cache.get_word_pair_proximity_docids(index, txn, left, right, *proximity) + } + WordPair::WordPrefixSwapped { left, right_prefix } => db_cache + .get_prefix_word_pair_proximity_docids(index, txn, left, right_prefix, *proximity), }?; let bitmap = bytes.map(CboRoaringBitmapCodec::deserialize_from).transpose()?.unwrap_or_default(); diff --git a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs index 16c2acf1f..3b9470be2 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs @@ -13,7 +13,9 @@ use crate::{Index, Result}; pub enum WordPair { // TODO: add WordsSwapped and WordPrefixSwapped case Words { left: String, right: String }, + WordsSwapped { left: String, right: String }, WordPrefix { left: String, right_prefix: String }, + WordPrefixSwapped { left: String, right_prefix: String }, } pub struct ProximityEdge {