mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
Introduce a new Sorter Cache for CboRoaringBitmaps
This commit is contained in:
parent
eafc097a85
commit
5d5769fd8a
10
Cargo.lock
generated
10
Cargo.lock
generated
@ -3290,6 +3290,15 @@ version = "0.4.21"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
|
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lru"
|
||||||
|
version = "0.12.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d3262e75e648fce39813cb56ac41f3c3e3f65217ebf3844d818d1f9398cfb0dc"
|
||||||
|
dependencies = [
|
||||||
|
"hashbrown 0.14.3",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lzma-rs"
|
name = "lzma-rs"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
@ -3550,6 +3559,7 @@ dependencies = [
|
|||||||
"json-depth-checker",
|
"json-depth-checker",
|
||||||
"levenshtein_automata",
|
"levenshtein_automata",
|
||||||
"liquid",
|
"liquid",
|
||||||
|
"lru",
|
||||||
"maplit",
|
"maplit",
|
||||||
"md5",
|
"md5",
|
||||||
"meili-snap",
|
"meili-snap",
|
||||||
|
@ -50,7 +50,7 @@ serde = { version = "1.0.204", features = ["derive"] }
|
|||||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||||
slice-group-by = "0.3.1"
|
slice-group-by = "0.3.1"
|
||||||
smallstr = { version = "0.3.0", features = ["serde"] }
|
smallstr = { version = "0.3.0", features = ["serde"] }
|
||||||
smallvec = "1.13.2"
|
smallvec = { version = "1.13.2", features = ["union"] }
|
||||||
smartstring = "1.0.1"
|
smartstring = "1.0.1"
|
||||||
tempfile = "3.10.1"
|
tempfile = "3.10.1"
|
||||||
thiserror = "1.0.61"
|
thiserror = "1.0.61"
|
||||||
@ -88,6 +88,7 @@ tracing = "0.1.40"
|
|||||||
ureq = { version = "2.10.0", features = ["json"] }
|
ureq = { version = "2.10.0", features = ["json"] }
|
||||||
url = "2.5.2"
|
url = "2.5.2"
|
||||||
rayon-par-bridge = "0.1.0"
|
rayon-par-bridge = "0.1.0"
|
||||||
|
lru = "0.12.3"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
mimalloc = { version = "0.1.43", default-features = false }
|
mimalloc = { version = "0.1.43", default-features = false }
|
||||||
|
138
milli/src/update/index_documents/cache.rs
Normal file
138
milli/src/update/index_documents/cache.rs
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
|
use std::mem;
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
|
|
||||||
|
use lru::LruCache;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
use smallvec::SmallVec;
|
||||||
|
|
||||||
|
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
|
||||||
|
use crate::CboRoaringBitmapCodec;
|
||||||
|
|
||||||
|
pub struct SorterCacheDelAddCboRoaringBitmap<const N: usize, MF> {
|
||||||
|
cache: LruCache<SmallVec<[u8; N]>, DelAddRoaringBitmap>,
|
||||||
|
sorter: grenad::Sorter<MF>,
|
||||||
|
deladd_buffer: Vec<u8>,
|
||||||
|
cbo_buffer: Vec<u8>,
|
||||||
|
conn: redis::Connection,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const N: usize, MF> SorterCacheDelAddCboRoaringBitmap<N, MF> {
|
||||||
|
pub fn new(cap: NonZeroUsize, sorter: grenad::Sorter<MF>, conn: redis::Connection) -> Self {
|
||||||
|
SorterCacheDelAddCboRoaringBitmap {
|
||||||
|
cache: LruCache::new(cap),
|
||||||
|
sorter,
|
||||||
|
deladd_buffer: Vec::new(),
|
||||||
|
cbo_buffer: Vec::new(),
|
||||||
|
conn,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const N: usize, MF, U> SorterCacheDelAddCboRoaringBitmap<N, MF>
|
||||||
|
where
|
||||||
|
MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>, U>,
|
||||||
|
{
|
||||||
|
pub fn insert_del_u32(&mut self, key: &[u8], n: u32) -> Result<(), grenad::Error<U>> {
|
||||||
|
match self.cache.get_mut(key) {
|
||||||
|
Some(DelAddRoaringBitmap { del, add: _ }) => {
|
||||||
|
del.get_or_insert_with(RoaringBitmap::new).insert(n);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
None => match self.cache.push(key.into(), DelAddRoaringBitmap::new_del(n)) {
|
||||||
|
Some((key, deladd)) => self.write_entry_to_sorter(key, deladd),
|
||||||
|
None => Ok(()),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn insert_add_u32(&mut self, key: &[u8], n: u32) -> Result<(), grenad::Error<U>> {
|
||||||
|
match self.cache.get_mut(key) {
|
||||||
|
Some(DelAddRoaringBitmap { del: _, add }) => {
|
||||||
|
add.get_or_insert_with(RoaringBitmap::new).insert(n);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
None => match self.cache.push(key.into(), DelAddRoaringBitmap::new_add(n)) {
|
||||||
|
Some((key, deladd)) => self.write_entry_to_sorter(key, deladd),
|
||||||
|
None => Ok(()),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn insert_del_add_u32(&mut self, key: &[u8], n: u32) -> Result<(), grenad::Error<U>> {
|
||||||
|
match self.cache.get_mut(key) {
|
||||||
|
Some(DelAddRoaringBitmap { del, add }) => {
|
||||||
|
del.get_or_insert_with(RoaringBitmap::new).insert(n);
|
||||||
|
add.get_or_insert_with(RoaringBitmap::new).insert(n);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
None => match self.cache.push(key.into(), DelAddRoaringBitmap::new_del_add(n)) {
|
||||||
|
Some((key, deladd)) => self.write_entry_to_sorter(key, deladd),
|
||||||
|
None => Ok(()),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_entry_to_sorter(
|
||||||
|
&mut self,
|
||||||
|
key: SmallVec<[u8; N]>,
|
||||||
|
deladd: DelAddRoaringBitmap,
|
||||||
|
) -> Result<(), grenad::Error<U>> {
|
||||||
|
self.deladd_buffer.clear();
|
||||||
|
let mut value_writer = KvWriterDelAdd::new(&mut self.deladd_buffer);
|
||||||
|
match deladd {
|
||||||
|
DelAddRoaringBitmap { del: Some(del), add: None } => {
|
||||||
|
self.cbo_buffer.clear();
|
||||||
|
CboRoaringBitmapCodec::serialize_into(&del, &mut self.cbo_buffer);
|
||||||
|
value_writer.insert(DelAdd::Deletion, &self.cbo_buffer)?;
|
||||||
|
}
|
||||||
|
DelAddRoaringBitmap { del: None, add: Some(add) } => {
|
||||||
|
self.cbo_buffer.clear();
|
||||||
|
CboRoaringBitmapCodec::serialize_into(&add, &mut self.cbo_buffer);
|
||||||
|
value_writer.insert(DelAdd::Addition, &self.cbo_buffer)?;
|
||||||
|
}
|
||||||
|
DelAddRoaringBitmap { del: Some(del), add: Some(add) } => {
|
||||||
|
self.cbo_buffer.clear();
|
||||||
|
CboRoaringBitmapCodec::serialize_into(&del, &mut self.cbo_buffer);
|
||||||
|
value_writer.insert(DelAdd::Deletion, &self.cbo_buffer)?;
|
||||||
|
|
||||||
|
self.cbo_buffer.clear();
|
||||||
|
CboRoaringBitmapCodec::serialize_into(&add, &mut self.cbo_buffer);
|
||||||
|
value_writer.insert(DelAdd::Addition, &self.cbo_buffer)?;
|
||||||
|
}
|
||||||
|
DelAddRoaringBitmap { del: None, add: None } => return Ok(()),
|
||||||
|
}
|
||||||
|
redis::cmd("INCR").arg(key.as_ref()).query::<usize>(&mut self.conn).unwrap();
|
||||||
|
self.sorter.insert(key, value_writer.into_inner().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_sorter(mut self) -> Result<grenad::Sorter<MF>, grenad::Error<U>> {
|
||||||
|
let default_lru = LruCache::new(NonZeroUsize::MIN);
|
||||||
|
for (key, deladd) in mem::replace(&mut self.cache, default_lru) {
|
||||||
|
self.write_entry_to_sorter(key, deladd)?;
|
||||||
|
}
|
||||||
|
Ok(self.sorter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DelAddRoaringBitmap {
|
||||||
|
pub del: Option<RoaringBitmap>,
|
||||||
|
pub add: Option<RoaringBitmap>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DelAddRoaringBitmap {
|
||||||
|
fn new_del_add(n: u32) -> Self {
|
||||||
|
DelAddRoaringBitmap {
|
||||||
|
del: Some(RoaringBitmap::from([n])),
|
||||||
|
add: Some(RoaringBitmap::from([n])),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_del(n: u32) -> Self {
|
||||||
|
DelAddRoaringBitmap { del: Some(RoaringBitmap::from([n])), add: None }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_add(n: u32) -> Self {
|
||||||
|
DelAddRoaringBitmap { del: None, add: Some(RoaringBitmap::from([n])) }
|
||||||
|
}
|
||||||
|
}
|
@ -1,3 +1,4 @@
|
|||||||
|
mod cache;
|
||||||
mod enrich;
|
mod enrich;
|
||||||
mod extract;
|
mod extract;
|
||||||
mod helpers;
|
mod helpers;
|
||||||
|
Loading…
Reference in New Issue
Block a user