mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 00:34:26 +01:00
Use the sorter cache when collection prefix docids
This commit is contained in:
parent
a3beaa90c5
commit
0e08906fcb
@ -140,6 +140,10 @@ where
|
||||
self.sorter.insert(key, value_writer.into_inner().unwrap())
|
||||
}
|
||||
|
||||
pub fn direct_insert(&mut self, key: &[u8], val: &[u8]) -> Result<(), grenad::Error<U>> {
|
||||
self.sorter.insert(key, val)
|
||||
}
|
||||
|
||||
pub fn into_sorter(mut self) -> Result<grenad::Sorter<MF>, grenad::Error<U>> {
|
||||
let default_lru = LruCache::new(NonZeroUsize::MIN);
|
||||
for (key, deladd) in mem::replace(&mut self.cache, default_lru) {
|
||||
@ -155,10 +159,6 @@ pub struct DelAddRoaringBitmap {
|
||||
}
|
||||
|
||||
impl DelAddRoaringBitmap {
|
||||
fn new_del_add(bitmap: RoaringBitmap) -> Self {
|
||||
DelAddRoaringBitmap { del: Some(bitmap.clone()), add: Some(bitmap) }
|
||||
}
|
||||
|
||||
fn new_del_add_u32(n: u32) -> Self {
|
||||
DelAddRoaringBitmap {
|
||||
del: Some(RoaringBitmap::from([n])),
|
||||
|
@ -1,4 +1,4 @@
|
||||
mod cache;
|
||||
pub mod cache;
|
||||
mod enrich;
|
||||
mod extract;
|
||||
mod helpers;
|
||||
|
@ -1,11 +1,13 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use grenad::CompressionType;
|
||||
use heed::types::{Bytes, Str};
|
||||
use heed::types::Str;
|
||||
use heed::Database;
|
||||
|
||||
use super::index_documents::cache::SorterCacheDelAddCboRoaringBitmap;
|
||||
use super::index_documents::REDIS_CLIENT;
|
||||
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvWriterDelAdd};
|
||||
use crate::update::del_add::deladd_serialize_add_side;
|
||||
use crate::update::index_documents::{
|
||||
create_sorter, merge_deladd_cbo_roaring_bitmaps,
|
||||
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
|
||||
@ -53,11 +55,9 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
||||
common_prefix_fst_words: &[&[String]],
|
||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||
) -> Result<()> {
|
||||
let mut conn = REDIS_CLIENT.get_connection().unwrap();
|
||||
|
||||
// It is forbidden to keep a mutable reference into the database
|
||||
// and write into it at the same time, therefore we write into another file.
|
||||
let mut prefix_docids_sorter = create_sorter(
|
||||
let prefix_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
merge_deladd_cbo_roaring_bitmaps,
|
||||
self.chunk_compression_type,
|
||||
@ -65,6 +65,11 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
||||
self.max_nb_chunks,
|
||||
self.max_memory,
|
||||
);
|
||||
let mut cached_prefix_docids_sorter = SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
|
||||
NonZeroUsize::new(200).unwrap(),
|
||||
prefix_docids_sorter,
|
||||
REDIS_CLIENT.get_connection().unwrap(),
|
||||
);
|
||||
|
||||
if !common_prefix_fst_words.is_empty() {
|
||||
let mut current_prefixes: Option<&&[String]> = None;
|
||||
@ -76,8 +81,7 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
||||
_otherwise => {
|
||||
write_prefixes_in_sorter(
|
||||
&mut prefixes_cache,
|
||||
&mut prefix_docids_sorter,
|
||||
&mut conn,
|
||||
&mut cached_prefix_docids_sorter,
|
||||
)?;
|
||||
common_prefix_fst_words
|
||||
.iter()
|
||||
@ -100,22 +104,17 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
write_prefixes_in_sorter(&mut prefixes_cache, &mut prefix_docids_sorter, &mut conn)?;
|
||||
write_prefixes_in_sorter(&mut prefixes_cache, &mut cached_prefix_docids_sorter)?;
|
||||
}
|
||||
|
||||
// We fetch the docids associated to the newly added word prefix fst only.
|
||||
let db = self.word_docids.remap_data_type::<Bytes>();
|
||||
let mut buffer = Vec::new();
|
||||
let db = self.word_docids.lazily_decode_data();
|
||||
for prefix in new_prefix_fst_words {
|
||||
let prefix = std::str::from_utf8(prefix.as_bytes())?;
|
||||
for result in db.prefix_iter(self.wtxn, prefix)? {
|
||||
let (_word, data) = result?;
|
||||
buffer.clear();
|
||||
let mut writer = KvWriterDelAdd::new(&mut buffer);
|
||||
writer.insert(DelAdd::Addition, data)?;
|
||||
|
||||
redis::cmd("INCR").arg(prefix.as_bytes()).query::<usize>(&mut conn).unwrap();
|
||||
prefix_docids_sorter.insert(prefix, writer.into_inner()?)?;
|
||||
let (_word, lazy_data) = result?;
|
||||
cached_prefix_docids_sorter
|
||||
.insert_add(prefix.as_bytes(), lazy_data.decode().unwrap())?;
|
||||
}
|
||||
}
|
||||
|
||||
@ -133,7 +132,7 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
||||
|
||||
// We finally write the word prefix docids into the LMDB database.
|
||||
write_sorter_into_database(
|
||||
prefix_docids_sorter,
|
||||
cached_prefix_docids_sorter.into_sorter()?,
|
||||
&self.word_prefix_docids,
|
||||
self.wtxn,
|
||||
database_is_empty,
|
||||
@ -147,14 +146,12 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
||||
|
||||
fn write_prefixes_in_sorter(
|
||||
prefixes: &mut HashMap<Vec<u8>, Vec<Vec<u8>>>,
|
||||
sorter: &mut grenad::Sorter<MergeFn>,
|
||||
conn: &mut redis::Connection,
|
||||
sorter: &mut SorterCacheDelAddCboRoaringBitmap<20, MergeFn>,
|
||||
) -> Result<()> {
|
||||
for (key, data_slices) in prefixes.drain() {
|
||||
for data in data_slices {
|
||||
if valid_lmdb_key(&key) {
|
||||
redis::cmd("INCR").arg(key.as_slice()).query::<usize>(conn).unwrap();
|
||||
sorter.insert(&key, data)?;
|
||||
sorter.direct_insert(&key, &data)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::num::NonZeroUsize;
|
||||
use std::str;
|
||||
|
||||
use grenad::CompressionType;
|
||||
@ -9,7 +10,8 @@ use tracing::debug;
|
||||
use crate::error::SerializationError;
|
||||
use crate::heed_codec::StrBEU16Codec;
|
||||
use crate::index::main_key::WORDS_PREFIXES_FST_KEY;
|
||||
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvWriterDelAdd};
|
||||
use crate::update::del_add::deladd_serialize_add_side;
|
||||
use crate::update::index_documents::cache::SorterCacheDelAddCboRoaringBitmap;
|
||||
use crate::update::index_documents::{
|
||||
create_sorter, merge_deladd_cbo_roaring_bitmaps,
|
||||
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
|
||||
@ -59,9 +61,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
||||
) -> Result<()> {
|
||||
debug!("Computing and writing the word levels integers docids into LMDB on disk...");
|
||||
|
||||
let mut conn = REDIS_CLIENT.get_connection().unwrap();
|
||||
|
||||
let mut prefix_integer_docids_sorter = create_sorter(
|
||||
let prefix_integer_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
merge_deladd_cbo_roaring_bitmaps,
|
||||
self.chunk_compression_type,
|
||||
@ -69,6 +69,12 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
||||
self.max_nb_chunks,
|
||||
self.max_memory,
|
||||
);
|
||||
let mut cached_prefix_integer_docids_sorter =
|
||||
SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
|
||||
NonZeroUsize::new(200).unwrap(),
|
||||
prefix_integer_docids_sorter,
|
||||
REDIS_CLIENT.get_connection().unwrap(),
|
||||
);
|
||||
|
||||
if !common_prefix_fst_words.is_empty() {
|
||||
// We fetch all the new common prefixes between the previous and new prefix fst.
|
||||
@ -86,8 +92,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
||||
_otherwise => {
|
||||
write_prefixes_in_sorter(
|
||||
&mut prefixes_cache,
|
||||
&mut prefix_integer_docids_sorter,
|
||||
&mut conn,
|
||||
&mut cached_prefix_integer_docids_sorter,
|
||||
)?;
|
||||
common_prefix_fst_words
|
||||
.iter()
|
||||
@ -115,14 +120,12 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
||||
|
||||
write_prefixes_in_sorter(
|
||||
&mut prefixes_cache,
|
||||
&mut prefix_integer_docids_sorter,
|
||||
&mut conn,
|
||||
&mut cached_prefix_integer_docids_sorter,
|
||||
)?;
|
||||
}
|
||||
|
||||
// We fetch the docids associated to the newly added word prefix fst only.
|
||||
let db = self.word_database.remap_data_type::<Bytes>();
|
||||
let mut buffer = Vec::new();
|
||||
let db = self.word_database.lazily_decode_data();
|
||||
for prefix_bytes in new_prefix_fst_words {
|
||||
let prefix = str::from_utf8(prefix_bytes.as_bytes()).map_err(|_| {
|
||||
SerializationError::Decoding { db_name: Some(WORDS_PREFIXES_FST_KEY) }
|
||||
@ -134,16 +137,12 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
||||
.prefix_iter(self.wtxn, prefix_bytes.as_bytes())?
|
||||
.remap_key_type::<StrBEU16Codec>();
|
||||
for result in iter {
|
||||
let ((word, pos), data) = result?;
|
||||
let ((word, pos), lazy_data) = result?;
|
||||
if word.starts_with(prefix) {
|
||||
let key = (prefix, pos);
|
||||
let bytes = StrBEU16Codec::bytes_encode(&key).unwrap();
|
||||
|
||||
buffer.clear();
|
||||
let mut writer = KvWriterDelAdd::new(&mut buffer);
|
||||
writer.insert(DelAdd::Addition, data)?;
|
||||
redis::cmd("INCR").arg(bytes.as_ref()).query::<usize>(&mut conn).unwrap();
|
||||
prefix_integer_docids_sorter.insert(bytes, writer.into_inner()?)?;
|
||||
cached_prefix_integer_docids_sorter
|
||||
.insert_add(&bytes, lazy_data.decode().unwrap())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -167,7 +166,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
||||
|
||||
// We finally write all the word prefix integer docids into the LMDB database.
|
||||
write_sorter_into_database(
|
||||
prefix_integer_docids_sorter,
|
||||
cached_prefix_integer_docids_sorter.into_sorter()?,
|
||||
&self.prefix_database,
|
||||
self.wtxn,
|
||||
database_is_empty,
|
||||
@ -181,15 +180,13 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
||||
|
||||
fn write_prefixes_in_sorter(
|
||||
prefixes: &mut HashMap<Vec<u8>, Vec<Vec<u8>>>,
|
||||
sorter: &mut grenad::Sorter<MergeFn>,
|
||||
conn: &mut redis::Connection,
|
||||
sorter: &mut SorterCacheDelAddCboRoaringBitmap<20, MergeFn>,
|
||||
) -> Result<()> {
|
||||
// TODO: Merge before insertion.
|
||||
for (key, data_slices) in prefixes.drain() {
|
||||
for data in data_slices {
|
||||
if valid_lmdb_key(&key) {
|
||||
redis::cmd("INCR").arg(key.as_slice()).query::<usize>(conn).unwrap();
|
||||
sorter.insert(&key, data)?;
|
||||
sorter.direct_insert(&key, &data)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user