mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-10-30 01:38:49 +01:00
Correctly spill into a grenad sorter
This commit is contained in:
parent
287b1c51db
commit
b602f73f0d
@ -63,22 +63,6 @@ impl<'extractor, MF: MergeFunction> CboCachedSorter<'extractor, MF> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn insert_del(&mut self, key: &[u8], bitmap: RoaringBitmap) {
|
|
||||||
match self.cache.raw_entry_mut().from_key(key) {
|
|
||||||
RawEntryMut::Occupied(mut entry) => {
|
|
||||||
let DelAddRoaringBitmap { del, add: _ } = entry.get_mut();
|
|
||||||
*del.get_or_insert_with(RoaringBitmap::default) |= bitmap;
|
|
||||||
}
|
|
||||||
RawEntryMut::Vacant(entry) => {
|
|
||||||
self.total_insertions += 1;
|
|
||||||
self.fitted_in_key += (key.len() <= KEY_SIZE) as usize;
|
|
||||||
let alloc = RefBump::clone(&self.alloc);
|
|
||||||
let key = RefBump::map(alloc, |b| b.alloc_slice_copy(key));
|
|
||||||
entry.insert(RefBytes(key), DelAddRoaringBitmap::new_del(bitmap));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn insert_add_u32(&mut self, key: &[u8], n: u32) {
|
pub fn insert_add_u32(&mut self, key: &[u8], n: u32) {
|
||||||
match self.cache.raw_entry_mut().from_key(key) {
|
match self.cache.raw_entry_mut().from_key(key) {
|
||||||
RawEntryMut::Occupied(mut entry) => {
|
RawEntryMut::Occupied(mut entry) => {
|
||||||
@ -95,96 +79,69 @@ impl<'extractor, MF: MergeFunction> CboCachedSorter<'extractor, MF> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn insert_add(&mut self, key: &[u8], bitmap: RoaringBitmap) {
|
|
||||||
match self.cache.raw_entry_mut().from_key(key) {
|
|
||||||
RawEntryMut::Occupied(mut entry) => {
|
|
||||||
let DelAddRoaringBitmap { del: _, add } = entry.get_mut();
|
|
||||||
*add.get_or_insert_with(RoaringBitmap::default) |= bitmap;
|
|
||||||
}
|
|
||||||
RawEntryMut::Vacant(entry) => {
|
|
||||||
self.total_insertions += 1;
|
|
||||||
self.fitted_in_key += (key.len() <= KEY_SIZE) as usize;
|
|
||||||
let alloc = RefBump::clone(&self.alloc);
|
|
||||||
let key = RefBump::map(alloc, |b| b.alloc_slice_copy(key));
|
|
||||||
entry.insert(RefBytes(key), DelAddRoaringBitmap::new_add(bitmap));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn insert_del_add_u32(&mut self, key: &[u8], n: u32) {
|
|
||||||
match self.cache.raw_entry_mut().from_key(key) {
|
|
||||||
RawEntryMut::Occupied(mut entry) => {
|
|
||||||
let DelAddRoaringBitmap { del, add } = entry.get_mut();
|
|
||||||
del.get_or_insert_with(RoaringBitmap::default).insert(n);
|
|
||||||
add.get_or_insert_with(RoaringBitmap::default).insert(n);
|
|
||||||
}
|
|
||||||
RawEntryMut::Vacant(entry) => {
|
|
||||||
self.total_insertions += 1;
|
|
||||||
self.fitted_in_key += (key.len() <= KEY_SIZE) as usize;
|
|
||||||
let alloc = RefBump::clone(&self.alloc);
|
|
||||||
let key = RefBump::map(alloc, |b| b.alloc_slice_copy(key));
|
|
||||||
entry.insert(RefBytes(key), DelAddRoaringBitmap::new_del_add_u32(n));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn write_entry<A: AsRef<[u8]>>(
|
fn write_entry<A: AsRef<[u8]>>(
|
||||||
&mut self,
|
sorter: &mut Sorter<MF>,
|
||||||
|
deladd_buffer: &mut Vec<u8>,
|
||||||
|
cbo_buffer: &mut Vec<u8>,
|
||||||
key: A,
|
key: A,
|
||||||
deladd: DelAddRoaringBitmap,
|
deladd: DelAddRoaringBitmap,
|
||||||
) -> grenad::Result<(), MF::Error> {
|
) -> grenad::Result<(), MF::Error> {
|
||||||
/// TODO we must create a serialization trait to correctly serialize bitmaps
|
deladd_buffer.clear();
|
||||||
self.deladd_buffer.clear();
|
let mut value_writer = KvWriterDelAdd::new(deladd_buffer);
|
||||||
let mut value_writer = KvWriterDelAdd::new(&mut self.deladd_buffer);
|
|
||||||
match deladd {
|
match deladd {
|
||||||
DelAddRoaringBitmap { del: Some(del), add: None } => {
|
DelAddRoaringBitmap { del: Some(del), add: None } => {
|
||||||
self.cbo_buffer.clear();
|
cbo_buffer.clear();
|
||||||
CboRoaringBitmapCodec::serialize_into(&del, &mut self.cbo_buffer);
|
CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer);
|
||||||
value_writer.insert(DelAdd::Deletion, &self.cbo_buffer)?;
|
value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
|
||||||
}
|
}
|
||||||
DelAddRoaringBitmap { del: None, add: Some(add) } => {
|
DelAddRoaringBitmap { del: None, add: Some(add) } => {
|
||||||
self.cbo_buffer.clear();
|
cbo_buffer.clear();
|
||||||
CboRoaringBitmapCodec::serialize_into(&add, &mut self.cbo_buffer);
|
CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer);
|
||||||
value_writer.insert(DelAdd::Addition, &self.cbo_buffer)?;
|
value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
|
||||||
}
|
}
|
||||||
DelAddRoaringBitmap { del: Some(del), add: Some(add) } => {
|
DelAddRoaringBitmap { del: Some(del), add: Some(add) } => {
|
||||||
self.cbo_buffer.clear();
|
cbo_buffer.clear();
|
||||||
CboRoaringBitmapCodec::serialize_into(&del, &mut self.cbo_buffer);
|
CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer);
|
||||||
value_writer.insert(DelAdd::Deletion, &self.cbo_buffer)?;
|
value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
|
||||||
|
|
||||||
self.cbo_buffer.clear();
|
cbo_buffer.clear();
|
||||||
CboRoaringBitmapCodec::serialize_into(&add, &mut self.cbo_buffer);
|
CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer);
|
||||||
value_writer.insert(DelAdd::Addition, &self.cbo_buffer)?;
|
value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
|
||||||
}
|
}
|
||||||
DelAddRoaringBitmap { del: None, add: None } => return Ok(()),
|
DelAddRoaringBitmap { del: None, add: None } => return Ok(()),
|
||||||
}
|
}
|
||||||
let bytes = value_writer.into_inner().unwrap();
|
let bytes = value_writer.into_inner().unwrap();
|
||||||
self.sorter.insert(key, bytes)
|
sorter.insert(key, bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn direct_insert(&mut self, key: &[u8], val: &[u8]) -> grenad::Result<(), MF::Error> {
|
pub fn spill_to_disk(self) -> grenad::Result<SpilledCache<MF>, MF::Error> {
|
||||||
self.sorter.insert(key, val)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn spill_to_disk(self) -> std::io::Result<SpilledCache<MF>> {
|
|
||||||
let Self {
|
let Self {
|
||||||
cache,
|
cache,
|
||||||
alloc: _,
|
alloc: _,
|
||||||
sorter,
|
mut sorter,
|
||||||
deladd_buffer,
|
mut deladd_buffer,
|
||||||
cbo_buffer,
|
mut cbo_buffer,
|
||||||
total_insertions,
|
total_insertions,
|
||||||
fitted_in_key,
|
fitted_in_key,
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
/// I want to spill to disk for real
|
for (key, deladd) in cache {
|
||||||
drop(cache);
|
Self::write_entry(&mut sorter, &mut deladd_buffer, &mut cbo_buffer, key, deladd)?;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(SpilledCache { sorter, deladd_buffer, cbo_buffer, total_insertions, fitted_in_key })
|
Ok(SpilledCache { sorter, deladd_buffer, cbo_buffer, total_insertions, fitted_in_key })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn into_sorter(self) -> grenad::Result<Sorter<MF>, MF::Error> {
|
pub fn into_sorter(self) -> grenad::Result<Sorter<MF>, MF::Error> {
|
||||||
let Self { cache, sorter, total_insertions, fitted_in_key, .. } = self;
|
let Self {
|
||||||
|
cache,
|
||||||
|
alloc: _,
|
||||||
|
mut sorter,
|
||||||
|
mut deladd_buffer,
|
||||||
|
mut cbo_buffer,
|
||||||
|
total_insertions,
|
||||||
|
fitted_in_key,
|
||||||
|
} = self;
|
||||||
|
|
||||||
let mut all_n_containers = Vec::new();
|
let mut all_n_containers = Vec::new();
|
||||||
let mut all_n_array_containers = Vec::new();
|
let mut all_n_array_containers = Vec::new();
|
||||||
@ -214,8 +171,7 @@ impl<'extractor, MF: MergeFunction> CboCachedSorter<'extractor, MF> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (key, deladd) in cache {
|
for (key, deladd) in cache {
|
||||||
// self.write_entry(key, deladd)?;
|
Self::write_entry(&mut sorter, &mut deladd_buffer, &mut cbo_buffer, key, deladd)?;
|
||||||
todo!("spill into the sorter")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut output = String::new();
|
let mut output = String::new();
|
||||||
|
Loading…
Reference in New Issue
Block a user