diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs new file mode 100644 index 000000000..15f9f7873 --- /dev/null +++ b/milli/src/search/new/db_cache.rs @@ -0,0 +1,119 @@ +use std::collections::{hash_map::Entry, HashMap}; + +use heed::{types::ByteSlice, RoTxn}; + +use crate::{Index, Result}; + +#[derive(Default)] +pub struct DatabaseCache<'transaction> { + pub word_pair_proximity_docids: HashMap<(u8, String, String), Option<&'transaction [u8]>>, + pub word_prefix_pair_proximity_docids: + HashMap<(u8, String, String), Option<&'transaction [u8]>>, + pub word_docids: HashMap>, + pub exact_word_docids: HashMap>, + pub word_prefix_docids: HashMap>, +} +impl<'transaction> DatabaseCache<'transaction> { + pub fn get_word_docids( + &mut self, + index: &Index, + txn: &'transaction RoTxn, + word: &str, + ) -> Result> { + let bitmap_ptr = match self.word_docids.entry(word.to_owned()) { + Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(), + Entry::Vacant(entry) => { + let bitmap_ptr = index.word_docids.remap_data_type::().get(txn, word)?; + entry.insert(bitmap_ptr); + bitmap_ptr + } + }; + Ok(bitmap_ptr) + } + pub fn get_prefix_docids( + &mut self, + index: &Index, + txn: &'transaction RoTxn, + prefix: &str, + ) -> Result> { + // In the future, this will be a frozen roaring bitmap + let bitmap_ptr = match self.word_prefix_docids.entry(prefix.to_owned()) { + Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(), + Entry::Vacant(entry) => { + let bitmap_ptr = + index.word_prefix_docids.remap_data_type::().get(txn, prefix)?; + entry.insert(bitmap_ptr); + bitmap_ptr + } + }; + Ok(bitmap_ptr) + } + + pub fn get_word_pair_proximity_docids( + &mut self, + index: &Index, + txn: &'transaction RoTxn, + word1: &str, + word2: &str, + proximity: u8, + ) -> Result> { + let key = (proximity, word1.to_owned(), word2.to_owned()); + match self.word_pair_proximity_docids.entry(key.clone()) { + Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), + Entry::Vacant(entry) => { + // Note that now, we really want to do a prefix iter over (w1, w2) to get all the possible proximities + // but oh well + // + // Actually, we shouldn'transaction greedily access this DB at all + // a DB (w1, w2) -> [proximities] would be much better + // We could even have a DB that is (w1) -> set of words such that (w1, w2) are in proximity + // And if we worked with words encoded as integers, the set of words could be a roaring bitmap + // Then, to find all the proximities between two list of words, we'd do: + + // inputs: + // - words1 (roaring bitmap) + // - words2 (roaring bitmap) + // output: + // - [(word1, word2, [proximities])] + // algo: + // let mut ouput = vec![]; + // for word1 in words1 { + // let all_words_in_proximity_of_w1 = pair_words_db.get(word1); + // let words_in_proximity_of_w1 = all_words_in_proximity_of_w1 & words2; + // for word2 in words_in_proximity_of_w1 { + // let proximties = prox_db.get(word1, word2); + // output.push(word1, word2, proximities); + // } + // } + let bitmap_ptr = index + .word_pair_proximity_docids + .remap_data_type::() + .get(txn, &(key.0, key.1.as_str(), key.2.as_str()))?; + entry.insert(bitmap_ptr); + Ok(bitmap_ptr) + } + } + } + + pub fn get_word_prefix_pair_proximity_docids( + &mut self, + index: &Index, + txn: &'transaction RoTxn, + word1: &str, + prefix2: &str, + proximity: u8, + ) -> Result> { + let key = (proximity, word1.to_owned(), prefix2.to_owned()); + match self.word_prefix_pair_proximity_docids.entry(key.clone()) { + Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), + Entry::Vacant(entry) => { + let bitmap_ptr = index + .word_prefix_pair_proximity_docids + .remap_data_type::() + .get(txn, &(key.0, key.1.as_str(), key.2.as_str()))?; + entry.insert(bitmap_ptr); + Ok(bitmap_ptr) + } + } + } +}