mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-05-17 17:53:57 +02:00
reduce the number of computed prefix
This commit is contained in:
parent
bb2e9419d3
commit
934b73142d
@ -37,12 +37,12 @@ pub struct DatabaseCache<'ctx> {
|
|||||||
|
|
||||||
pub words_fst: Option<fst::Set<Cow<'ctx, [u8]>>>,
|
pub words_fst: Option<fst::Set<Cow<'ctx, [u8]>>>,
|
||||||
pub word_position_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
|
pub word_position_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
|
||||||
pub word_prefix_position_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
|
pub word_prefix_position_docids: FxHashMap<(Interned<String>, u16), Option<RoaringBitmap>>,
|
||||||
pub word_positions: FxHashMap<Interned<String>, Vec<u16>>,
|
pub word_positions: FxHashMap<Interned<String>, Vec<u16>>,
|
||||||
pub word_prefix_positions: FxHashMap<Interned<String>, Vec<u16>>,
|
pub word_prefix_positions: FxHashMap<Interned<String>, Vec<u16>>,
|
||||||
|
|
||||||
pub word_fid_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
|
pub word_fid_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
|
||||||
pub word_prefix_fid_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
|
pub word_prefix_fid_docids: FxHashMap<(Interned<String>, u16), Option<RoaringBitmap>>,
|
||||||
pub word_fids: FxHashMap<Interned<String>, Vec<u16>>,
|
pub word_fids: FxHashMap<Interned<String>, Vec<u16>>,
|
||||||
pub word_prefix_fids: FxHashMap<Interned<String>, Vec<u16>>,
|
pub word_prefix_fids: FxHashMap<Interned<String>, Vec<u16>>,
|
||||||
}
|
}
|
||||||
@ -562,14 +562,46 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
DatabaseCache::get_value(
|
let cache = &mut self.db_cache.word_prefix_fid_docids;
|
||||||
self.txn,
|
let prefix_db = &self.index.word_prefix_fid_docids;
|
||||||
(word_prefix, fid),
|
let db = &self.index.word_fid_docids;
|
||||||
&(self.word_interner.get(word_prefix).as_str(), fid),
|
if let Entry::Vacant(entry) = cache.entry((word_prefix, fid)) {
|
||||||
&mut self.db_cache.word_prefix_fid_docids,
|
let word_prefix_bytes = self.word_interner.get(word_prefix).as_bytes().to_owned();
|
||||||
universe,
|
let word_prefix_str = std::str::from_utf8(&word_prefix_bytes).unwrap();
|
||||||
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
match prefix_db.get(self.txn, &(word_prefix_str, fid))? {
|
||||||
)
|
Some(mut bitmap) => {
|
||||||
|
if let Some(universe) = universe {
|
||||||
|
bitmap &= universe;
|
||||||
|
}
|
||||||
|
entry.insert(Some(bitmap));
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let mut key = word_prefix_bytes.clone();
|
||||||
|
key.push(0);
|
||||||
|
let remap_key_type = db
|
||||||
|
.remap_key_type::<Bytes>()
|
||||||
|
.prefix_iter(self.txn, &key)?
|
||||||
|
.remap_key_type::<StrBEU16Codec>();
|
||||||
|
|
||||||
|
let mut bitmap = RoaringBitmap::new();
|
||||||
|
for result in remap_key_type {
|
||||||
|
let ((_, pos), value) = result?;
|
||||||
|
|
||||||
|
if pos == fid {
|
||||||
|
if let Some(universe) = universe {
|
||||||
|
bitmap |= value & universe;
|
||||||
|
} else {
|
||||||
|
bitmap |= value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
entry.insert(Some(bitmap));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(cache.get(&(word_prefix, fid)).unwrap().clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_fids(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
|
pub fn get_db_word_fids(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
|
||||||
@ -605,6 +637,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
let mut key = self.word_interner.get(word_prefix).as_bytes().to_owned();
|
let mut key = self.word_interner.get(word_prefix).as_bytes().to_owned();
|
||||||
key.push(0);
|
key.push(0);
|
||||||
let mut fids = vec![];
|
let mut fids = vec![];
|
||||||
|
// TODO: This is no more exhaustive, we should iterate over all fids.
|
||||||
let remap_key_type = self
|
let remap_key_type = self
|
||||||
.index
|
.index
|
||||||
.word_prefix_fid_docids
|
.word_prefix_fid_docids
|
||||||
@ -612,11 +645,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
.prefix_iter(self.txn, &key)?
|
.prefix_iter(self.txn, &key)?
|
||||||
.remap_key_type::<StrBEU16Codec>();
|
.remap_key_type::<StrBEU16Codec>();
|
||||||
for result in remap_key_type {
|
for result in remap_key_type {
|
||||||
let ((_, fid), value) = result?;
|
let ((_, fid), _value) = result?;
|
||||||
// filling other caches to avoid searching for them again
|
|
||||||
self.db_cache
|
|
||||||
.word_prefix_fid_docids
|
|
||||||
.insert((word_prefix, fid), Some(Cow::Borrowed(value)));
|
|
||||||
fids.push(fid);
|
fids.push(fid);
|
||||||
}
|
}
|
||||||
entry.insert(fids.clone());
|
entry.insert(fids.clone());
|
||||||
@ -648,14 +677,46 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
word_prefix: Interned<String>,
|
word_prefix: Interned<String>,
|
||||||
position: u16,
|
position: u16,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
DatabaseCache::get_value(
|
let cache = &mut self.db_cache.word_prefix_position_docids;
|
||||||
self.txn,
|
let prefix_db = &self.index.word_prefix_position_docids;
|
||||||
(word_prefix, position),
|
let db = &self.index.word_position_docids;
|
||||||
&(self.word_interner.get(word_prefix).as_str(), position),
|
if let Entry::Vacant(entry) = cache.entry((word_prefix, position)) {
|
||||||
&mut self.db_cache.word_prefix_position_docids,
|
let word_prefix_bytes = self.word_interner.get(word_prefix).as_bytes().to_owned();
|
||||||
universe,
|
let word_prefix_str = std::str::from_utf8(&word_prefix_bytes).unwrap();
|
||||||
self.index.word_prefix_position_docids.remap_data_type::<Bytes>(),
|
match prefix_db.get(self.txn, &(word_prefix_str, position))? {
|
||||||
)
|
Some(mut bitmap) => {
|
||||||
|
if let Some(universe) = universe {
|
||||||
|
bitmap &= universe;
|
||||||
|
}
|
||||||
|
entry.insert(Some(bitmap));
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let mut key = word_prefix_bytes.clone();
|
||||||
|
key.push(0);
|
||||||
|
let remap_key_type = db
|
||||||
|
.remap_key_type::<Bytes>()
|
||||||
|
.prefix_iter(self.txn, &key)?
|
||||||
|
.remap_key_type::<StrBEU16Codec>();
|
||||||
|
|
||||||
|
let mut bitmap = RoaringBitmap::new();
|
||||||
|
for result in remap_key_type {
|
||||||
|
let ((_, pos), value) = result?;
|
||||||
|
|
||||||
|
if pos == position {
|
||||||
|
if let Some(universe) = universe {
|
||||||
|
bitmap |= value & universe;
|
||||||
|
} else {
|
||||||
|
bitmap |= value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
entry.insert(Some(bitmap));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(cache.get(&(word_prefix, position)).unwrap().clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_positions(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
|
pub fn get_db_word_positions(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
|
||||||
@ -696,6 +757,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
let mut key = self.word_interner.get(word_prefix).as_bytes().to_owned();
|
let mut key = self.word_interner.get(word_prefix).as_bytes().to_owned();
|
||||||
key.push(0);
|
key.push(0);
|
||||||
let mut positions = vec![];
|
let mut positions = vec![];
|
||||||
|
// TODO: This is no more exhaustive, we should iterate over all positions.
|
||||||
let remap_key_type = self
|
let remap_key_type = self
|
||||||
.index
|
.index
|
||||||
.word_prefix_position_docids
|
.word_prefix_position_docids
|
||||||
@ -703,11 +765,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
.prefix_iter(self.txn, &key)?
|
.prefix_iter(self.txn, &key)?
|
||||||
.remap_key_type::<StrBEU16Codec>();
|
.remap_key_type::<StrBEU16Codec>();
|
||||||
for result in remap_key_type {
|
for result in remap_key_type {
|
||||||
let ((_, position), value) = result?;
|
let ((_, position), _value) = result?;
|
||||||
// filling other caches to avoid searching for them again
|
|
||||||
self.db_cache
|
|
||||||
.word_prefix_position_docids
|
|
||||||
.insert((word_prefix, position), Some(Cow::Borrowed(value)));
|
|
||||||
positions.push(position);
|
positions.push(position);
|
||||||
}
|
}
|
||||||
entry.insert(positions.clone());
|
entry.insert(positions.clone());
|
||||||
|
@ -291,6 +291,9 @@ impl<'a, 'rtxn> FrozenPrefixIntegerBitmaps<'a, 'rtxn> {
|
|||||||
let (_word, pos) = StrBEU16Codec::bytes_decode(key).map_err(Error::Decoding)?;
|
let (_word, pos) = StrBEU16Codec::bytes_decode(key).map_err(Error::Decoding)?;
|
||||||
positions.entry(pos).or_insert_with(Vec::new).push(bytes);
|
positions.entry(pos).or_insert_with(Vec::new).push(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We remove all the positions that have less than 100 bitmaps.
|
||||||
|
positions.retain(|_, bitmaps| bitmaps.len() > 100);
|
||||||
assert!(prefixes_bitmaps.insert(prefix.as_str(), positions).is_none());
|
assert!(prefixes_bitmaps.insert(prefix.as_str(), positions).is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user