mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-02 09:35:45 +01:00
Use the sorter cache when collection prefix docids
This commit is contained in:
parent
a3beaa90c5
commit
0e08906fcb
@ -140,6 +140,10 @@ where
|
|||||||
self.sorter.insert(key, value_writer.into_inner().unwrap())
|
self.sorter.insert(key, value_writer.into_inner().unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn direct_insert(&mut self, key: &[u8], val: &[u8]) -> Result<(), grenad::Error<U>> {
|
||||||
|
self.sorter.insert(key, val)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn into_sorter(mut self) -> Result<grenad::Sorter<MF>, grenad::Error<U>> {
|
pub fn into_sorter(mut self) -> Result<grenad::Sorter<MF>, grenad::Error<U>> {
|
||||||
let default_lru = LruCache::new(NonZeroUsize::MIN);
|
let default_lru = LruCache::new(NonZeroUsize::MIN);
|
||||||
for (key, deladd) in mem::replace(&mut self.cache, default_lru) {
|
for (key, deladd) in mem::replace(&mut self.cache, default_lru) {
|
||||||
@ -155,10 +159,6 @@ pub struct DelAddRoaringBitmap {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl DelAddRoaringBitmap {
|
impl DelAddRoaringBitmap {
|
||||||
fn new_del_add(bitmap: RoaringBitmap) -> Self {
|
|
||||||
DelAddRoaringBitmap { del: Some(bitmap.clone()), add: Some(bitmap) }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn new_del_add_u32(n: u32) -> Self {
|
fn new_del_add_u32(n: u32) -> Self {
|
||||||
DelAddRoaringBitmap {
|
DelAddRoaringBitmap {
|
||||||
del: Some(RoaringBitmap::from([n])),
|
del: Some(RoaringBitmap::from([n])),
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
mod cache;
|
pub mod cache;
|
||||||
mod enrich;
|
mod enrich;
|
||||||
mod extract;
|
mod extract;
|
||||||
mod helpers;
|
mod helpers;
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
|
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use heed::types::{Bytes, Str};
|
use heed::types::Str;
|
||||||
use heed::Database;
|
use heed::Database;
|
||||||
|
|
||||||
|
use super::index_documents::cache::SorterCacheDelAddCboRoaringBitmap;
|
||||||
use super::index_documents::REDIS_CLIENT;
|
use super::index_documents::REDIS_CLIENT;
|
||||||
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvWriterDelAdd};
|
use crate::update::del_add::deladd_serialize_add_side;
|
||||||
use crate::update::index_documents::{
|
use crate::update::index_documents::{
|
||||||
create_sorter, merge_deladd_cbo_roaring_bitmaps,
|
create_sorter, merge_deladd_cbo_roaring_bitmaps,
|
||||||
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
|
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
|
||||||
@ -53,11 +55,9 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
|||||||
common_prefix_fst_words: &[&[String]],
|
common_prefix_fst_words: &[&[String]],
|
||||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut conn = REDIS_CLIENT.get_connection().unwrap();
|
|
||||||
|
|
||||||
// It is forbidden to keep a mutable reference into the database
|
// It is forbidden to keep a mutable reference into the database
|
||||||
// and write into it at the same time, therefore we write into another file.
|
// and write into it at the same time, therefore we write into another file.
|
||||||
let mut prefix_docids_sorter = create_sorter(
|
let prefix_docids_sorter = create_sorter(
|
||||||
grenad::SortAlgorithm::Unstable,
|
grenad::SortAlgorithm::Unstable,
|
||||||
merge_deladd_cbo_roaring_bitmaps,
|
merge_deladd_cbo_roaring_bitmaps,
|
||||||
self.chunk_compression_type,
|
self.chunk_compression_type,
|
||||||
@ -65,6 +65,11 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
|||||||
self.max_nb_chunks,
|
self.max_nb_chunks,
|
||||||
self.max_memory,
|
self.max_memory,
|
||||||
);
|
);
|
||||||
|
let mut cached_prefix_docids_sorter = SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
|
||||||
|
NonZeroUsize::new(200).unwrap(),
|
||||||
|
prefix_docids_sorter,
|
||||||
|
REDIS_CLIENT.get_connection().unwrap(),
|
||||||
|
);
|
||||||
|
|
||||||
if !common_prefix_fst_words.is_empty() {
|
if !common_prefix_fst_words.is_empty() {
|
||||||
let mut current_prefixes: Option<&&[String]> = None;
|
let mut current_prefixes: Option<&&[String]> = None;
|
||||||
@ -76,8 +81,7 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
|||||||
_otherwise => {
|
_otherwise => {
|
||||||
write_prefixes_in_sorter(
|
write_prefixes_in_sorter(
|
||||||
&mut prefixes_cache,
|
&mut prefixes_cache,
|
||||||
&mut prefix_docids_sorter,
|
&mut cached_prefix_docids_sorter,
|
||||||
&mut conn,
|
|
||||||
)?;
|
)?;
|
||||||
common_prefix_fst_words
|
common_prefix_fst_words
|
||||||
.iter()
|
.iter()
|
||||||
@ -100,22 +104,17 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
write_prefixes_in_sorter(&mut prefixes_cache, &mut prefix_docids_sorter, &mut conn)?;
|
write_prefixes_in_sorter(&mut prefixes_cache, &mut cached_prefix_docids_sorter)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We fetch the docids associated to the newly added word prefix fst only.
|
// We fetch the docids associated to the newly added word prefix fst only.
|
||||||
let db = self.word_docids.remap_data_type::<Bytes>();
|
let db = self.word_docids.lazily_decode_data();
|
||||||
let mut buffer = Vec::new();
|
|
||||||
for prefix in new_prefix_fst_words {
|
for prefix in new_prefix_fst_words {
|
||||||
let prefix = std::str::from_utf8(prefix.as_bytes())?;
|
let prefix = std::str::from_utf8(prefix.as_bytes())?;
|
||||||
for result in db.prefix_iter(self.wtxn, prefix)? {
|
for result in db.prefix_iter(self.wtxn, prefix)? {
|
||||||
let (_word, data) = result?;
|
let (_word, lazy_data) = result?;
|
||||||
buffer.clear();
|
cached_prefix_docids_sorter
|
||||||
let mut writer = KvWriterDelAdd::new(&mut buffer);
|
.insert_add(prefix.as_bytes(), lazy_data.decode().unwrap())?;
|
||||||
writer.insert(DelAdd::Addition, data)?;
|
|
||||||
|
|
||||||
redis::cmd("INCR").arg(prefix.as_bytes()).query::<usize>(&mut conn).unwrap();
|
|
||||||
prefix_docids_sorter.insert(prefix, writer.into_inner()?)?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -133,7 +132,7 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
|||||||
|
|
||||||
// We finally write the word prefix docids into the LMDB database.
|
// We finally write the word prefix docids into the LMDB database.
|
||||||
write_sorter_into_database(
|
write_sorter_into_database(
|
||||||
prefix_docids_sorter,
|
cached_prefix_docids_sorter.into_sorter()?,
|
||||||
&self.word_prefix_docids,
|
&self.word_prefix_docids,
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
database_is_empty,
|
database_is_empty,
|
||||||
@ -147,14 +146,12 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
|||||||
|
|
||||||
fn write_prefixes_in_sorter(
|
fn write_prefixes_in_sorter(
|
||||||
prefixes: &mut HashMap<Vec<u8>, Vec<Vec<u8>>>,
|
prefixes: &mut HashMap<Vec<u8>, Vec<Vec<u8>>>,
|
||||||
sorter: &mut grenad::Sorter<MergeFn>,
|
sorter: &mut SorterCacheDelAddCboRoaringBitmap<20, MergeFn>,
|
||||||
conn: &mut redis::Connection,
|
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
for (key, data_slices) in prefixes.drain() {
|
for (key, data_slices) in prefixes.drain() {
|
||||||
for data in data_slices {
|
for data in data_slices {
|
||||||
if valid_lmdb_key(&key) {
|
if valid_lmdb_key(&key) {
|
||||||
redis::cmd("INCR").arg(key.as_slice()).query::<usize>(conn).unwrap();
|
sorter.direct_insert(&key, &data)?;
|
||||||
sorter.insert(&key, data)?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
@ -9,7 +10,8 @@ use tracing::debug;
|
|||||||
use crate::error::SerializationError;
|
use crate::error::SerializationError;
|
||||||
use crate::heed_codec::StrBEU16Codec;
|
use crate::heed_codec::StrBEU16Codec;
|
||||||
use crate::index::main_key::WORDS_PREFIXES_FST_KEY;
|
use crate::index::main_key::WORDS_PREFIXES_FST_KEY;
|
||||||
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvWriterDelAdd};
|
use crate::update::del_add::deladd_serialize_add_side;
|
||||||
|
use crate::update::index_documents::cache::SorterCacheDelAddCboRoaringBitmap;
|
||||||
use crate::update::index_documents::{
|
use crate::update::index_documents::{
|
||||||
create_sorter, merge_deladd_cbo_roaring_bitmaps,
|
create_sorter, merge_deladd_cbo_roaring_bitmaps,
|
||||||
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
|
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
|
||||||
@ -59,9 +61,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
|||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
debug!("Computing and writing the word levels integers docids into LMDB on disk...");
|
debug!("Computing and writing the word levels integers docids into LMDB on disk...");
|
||||||
|
|
||||||
let mut conn = REDIS_CLIENT.get_connection().unwrap();
|
let prefix_integer_docids_sorter = create_sorter(
|
||||||
|
|
||||||
let mut prefix_integer_docids_sorter = create_sorter(
|
|
||||||
grenad::SortAlgorithm::Unstable,
|
grenad::SortAlgorithm::Unstable,
|
||||||
merge_deladd_cbo_roaring_bitmaps,
|
merge_deladd_cbo_roaring_bitmaps,
|
||||||
self.chunk_compression_type,
|
self.chunk_compression_type,
|
||||||
@ -69,6 +69,12 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
|||||||
self.max_nb_chunks,
|
self.max_nb_chunks,
|
||||||
self.max_memory,
|
self.max_memory,
|
||||||
);
|
);
|
||||||
|
let mut cached_prefix_integer_docids_sorter =
|
||||||
|
SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
|
||||||
|
NonZeroUsize::new(200).unwrap(),
|
||||||
|
prefix_integer_docids_sorter,
|
||||||
|
REDIS_CLIENT.get_connection().unwrap(),
|
||||||
|
);
|
||||||
|
|
||||||
if !common_prefix_fst_words.is_empty() {
|
if !common_prefix_fst_words.is_empty() {
|
||||||
// We fetch all the new common prefixes between the previous and new prefix fst.
|
// We fetch all the new common prefixes between the previous and new prefix fst.
|
||||||
@ -86,8 +92,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
|||||||
_otherwise => {
|
_otherwise => {
|
||||||
write_prefixes_in_sorter(
|
write_prefixes_in_sorter(
|
||||||
&mut prefixes_cache,
|
&mut prefixes_cache,
|
||||||
&mut prefix_integer_docids_sorter,
|
&mut cached_prefix_integer_docids_sorter,
|
||||||
&mut conn,
|
|
||||||
)?;
|
)?;
|
||||||
common_prefix_fst_words
|
common_prefix_fst_words
|
||||||
.iter()
|
.iter()
|
||||||
@ -115,14 +120,12 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
|||||||
|
|
||||||
write_prefixes_in_sorter(
|
write_prefixes_in_sorter(
|
||||||
&mut prefixes_cache,
|
&mut prefixes_cache,
|
||||||
&mut prefix_integer_docids_sorter,
|
&mut cached_prefix_integer_docids_sorter,
|
||||||
&mut conn,
|
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We fetch the docids associated to the newly added word prefix fst only.
|
// We fetch the docids associated to the newly added word prefix fst only.
|
||||||
let db = self.word_database.remap_data_type::<Bytes>();
|
let db = self.word_database.lazily_decode_data();
|
||||||
let mut buffer = Vec::new();
|
|
||||||
for prefix_bytes in new_prefix_fst_words {
|
for prefix_bytes in new_prefix_fst_words {
|
||||||
let prefix = str::from_utf8(prefix_bytes.as_bytes()).map_err(|_| {
|
let prefix = str::from_utf8(prefix_bytes.as_bytes()).map_err(|_| {
|
||||||
SerializationError::Decoding { db_name: Some(WORDS_PREFIXES_FST_KEY) }
|
SerializationError::Decoding { db_name: Some(WORDS_PREFIXES_FST_KEY) }
|
||||||
@ -134,16 +137,12 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
|||||||
.prefix_iter(self.wtxn, prefix_bytes.as_bytes())?
|
.prefix_iter(self.wtxn, prefix_bytes.as_bytes())?
|
||||||
.remap_key_type::<StrBEU16Codec>();
|
.remap_key_type::<StrBEU16Codec>();
|
||||||
for result in iter {
|
for result in iter {
|
||||||
let ((word, pos), data) = result?;
|
let ((word, pos), lazy_data) = result?;
|
||||||
if word.starts_with(prefix) {
|
if word.starts_with(prefix) {
|
||||||
let key = (prefix, pos);
|
let key = (prefix, pos);
|
||||||
let bytes = StrBEU16Codec::bytes_encode(&key).unwrap();
|
let bytes = StrBEU16Codec::bytes_encode(&key).unwrap();
|
||||||
|
cached_prefix_integer_docids_sorter
|
||||||
buffer.clear();
|
.insert_add(&bytes, lazy_data.decode().unwrap())?;
|
||||||
let mut writer = KvWriterDelAdd::new(&mut buffer);
|
|
||||||
writer.insert(DelAdd::Addition, data)?;
|
|
||||||
redis::cmd("INCR").arg(bytes.as_ref()).query::<usize>(&mut conn).unwrap();
|
|
||||||
prefix_integer_docids_sorter.insert(bytes, writer.into_inner()?)?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -167,7 +166,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
|||||||
|
|
||||||
// We finally write all the word prefix integer docids into the LMDB database.
|
// We finally write all the word prefix integer docids into the LMDB database.
|
||||||
write_sorter_into_database(
|
write_sorter_into_database(
|
||||||
prefix_integer_docids_sorter,
|
cached_prefix_integer_docids_sorter.into_sorter()?,
|
||||||
&self.prefix_database,
|
&self.prefix_database,
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
database_is_empty,
|
database_is_empty,
|
||||||
@ -181,15 +180,13 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
|||||||
|
|
||||||
fn write_prefixes_in_sorter(
|
fn write_prefixes_in_sorter(
|
||||||
prefixes: &mut HashMap<Vec<u8>, Vec<Vec<u8>>>,
|
prefixes: &mut HashMap<Vec<u8>, Vec<Vec<u8>>>,
|
||||||
sorter: &mut grenad::Sorter<MergeFn>,
|
sorter: &mut SorterCacheDelAddCboRoaringBitmap<20, MergeFn>,
|
||||||
conn: &mut redis::Connection,
|
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// TODO: Merge before insertion.
|
// TODO: Merge before insertion.
|
||||||
for (key, data_slices) in prefixes.drain() {
|
for (key, data_slices) in prefixes.drain() {
|
||||||
for data in data_slices {
|
for data in data_slices {
|
||||||
if valid_lmdb_key(&key) {
|
if valid_lmdb_key(&key) {
|
||||||
redis::cmd("INCR").arg(key.as_slice()).query::<usize>(conn).unwrap();
|
sorter.direct_insert(&key, &data)?;
|
||||||
sorter.insert(&key, data)?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user