mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 06:44:27 +01:00
Introduce a DatabaseCache to memorize the addresses of LMDB values
This commit is contained in:
parent
a83007c013
commit
5065d8b0c1
119
milli/src/search/new/db_cache.rs
Normal file
119
milli/src/search/new/db_cache.rs
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
use std::collections::{hash_map::Entry, HashMap};
|
||||||
|
|
||||||
|
use heed::{types::ByteSlice, RoTxn};
|
||||||
|
|
||||||
|
use crate::{Index, Result};
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct DatabaseCache<'transaction> {
|
||||||
|
pub word_pair_proximity_docids: HashMap<(u8, String, String), Option<&'transaction [u8]>>,
|
||||||
|
pub word_prefix_pair_proximity_docids:
|
||||||
|
HashMap<(u8, String, String), Option<&'transaction [u8]>>,
|
||||||
|
pub word_docids: HashMap<String, Option<&'transaction [u8]>>,
|
||||||
|
pub exact_word_docids: HashMap<String, Option<&'transaction [u8]>>,
|
||||||
|
pub word_prefix_docids: HashMap<String, Option<&'transaction [u8]>>,
|
||||||
|
}
|
||||||
|
impl<'transaction> DatabaseCache<'transaction> {
|
||||||
|
pub fn get_word_docids(
|
||||||
|
&mut self,
|
||||||
|
index: &Index,
|
||||||
|
txn: &'transaction RoTxn,
|
||||||
|
word: &str,
|
||||||
|
) -> Result<Option<&'transaction [u8]>> {
|
||||||
|
let bitmap_ptr = match self.word_docids.entry(word.to_owned()) {
|
||||||
|
Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(),
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
|
let bitmap_ptr = index.word_docids.remap_data_type::<ByteSlice>().get(txn, word)?;
|
||||||
|
entry.insert(bitmap_ptr);
|
||||||
|
bitmap_ptr
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(bitmap_ptr)
|
||||||
|
}
|
||||||
|
pub fn get_prefix_docids(
|
||||||
|
&mut self,
|
||||||
|
index: &Index,
|
||||||
|
txn: &'transaction RoTxn,
|
||||||
|
prefix: &str,
|
||||||
|
) -> Result<Option<&'transaction [u8]>> {
|
||||||
|
// In the future, this will be a frozen roaring bitmap
|
||||||
|
let bitmap_ptr = match self.word_prefix_docids.entry(prefix.to_owned()) {
|
||||||
|
Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(),
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
|
let bitmap_ptr =
|
||||||
|
index.word_prefix_docids.remap_data_type::<ByteSlice>().get(txn, prefix)?;
|
||||||
|
entry.insert(bitmap_ptr);
|
||||||
|
bitmap_ptr
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(bitmap_ptr)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_word_pair_proximity_docids(
|
||||||
|
&mut self,
|
||||||
|
index: &Index,
|
||||||
|
txn: &'transaction RoTxn,
|
||||||
|
word1: &str,
|
||||||
|
word2: &str,
|
||||||
|
proximity: u8,
|
||||||
|
) -> Result<Option<&'transaction [u8]>> {
|
||||||
|
let key = (proximity, word1.to_owned(), word2.to_owned());
|
||||||
|
match self.word_pair_proximity_docids.entry(key.clone()) {
|
||||||
|
Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()),
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
|
// Note that now, we really want to do a prefix iter over (w1, w2) to get all the possible proximities
|
||||||
|
// but oh well
|
||||||
|
//
|
||||||
|
// Actually, we shouldn'transaction greedily access this DB at all
|
||||||
|
// a DB (w1, w2) -> [proximities] would be much better
|
||||||
|
// We could even have a DB that is (w1) -> set of words such that (w1, w2) are in proximity
|
||||||
|
// And if we worked with words encoded as integers, the set of words could be a roaring bitmap
|
||||||
|
// Then, to find all the proximities between two list of words, we'd do:
|
||||||
|
|
||||||
|
// inputs:
|
||||||
|
// - words1 (roaring bitmap)
|
||||||
|
// - words2 (roaring bitmap)
|
||||||
|
// output:
|
||||||
|
// - [(word1, word2, [proximities])]
|
||||||
|
// algo:
|
||||||
|
// let mut ouput = vec![];
|
||||||
|
// for word1 in words1 {
|
||||||
|
// let all_words_in_proximity_of_w1 = pair_words_db.get(word1);
|
||||||
|
// let words_in_proximity_of_w1 = all_words_in_proximity_of_w1 & words2;
|
||||||
|
// for word2 in words_in_proximity_of_w1 {
|
||||||
|
// let proximties = prox_db.get(word1, word2);
|
||||||
|
// output.push(word1, word2, proximities);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
let bitmap_ptr = index
|
||||||
|
.word_pair_proximity_docids
|
||||||
|
.remap_data_type::<ByteSlice>()
|
||||||
|
.get(txn, &(key.0, key.1.as_str(), key.2.as_str()))?;
|
||||||
|
entry.insert(bitmap_ptr);
|
||||||
|
Ok(bitmap_ptr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_word_prefix_pair_proximity_docids(
|
||||||
|
&mut self,
|
||||||
|
index: &Index,
|
||||||
|
txn: &'transaction RoTxn,
|
||||||
|
word1: &str,
|
||||||
|
prefix2: &str,
|
||||||
|
proximity: u8,
|
||||||
|
) -> Result<Option<&'transaction [u8]>> {
|
||||||
|
let key = (proximity, word1.to_owned(), prefix2.to_owned());
|
||||||
|
match self.word_prefix_pair_proximity_docids.entry(key.clone()) {
|
||||||
|
Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()),
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
|
let bitmap_ptr = index
|
||||||
|
.word_prefix_pair_proximity_docids
|
||||||
|
.remap_data_type::<ByteSlice>()
|
||||||
|
.get(txn, &(key.0, key.1.as_str(), key.2.as_str()))?;
|
||||||
|
entry.insert(bitmap_ptr);
|
||||||
|
Ok(bitmap_ptr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user