mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-10-30 01:38:49 +01:00
Finalize the cache freeze function
This commit is contained in:
parent
498f51c7b3
commit
9fcf51dcc6
@ -3,20 +3,21 @@ use std::collections::binary_heap::PeekMut;
|
|||||||
use std::collections::BinaryHeap;
|
use std::collections::BinaryHeap;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::hash::BuildHasher;
|
use std::hash::BuildHasher;
|
||||||
|
use std::io::BufReader;
|
||||||
use std::{io, iter, mem};
|
use std::{io, iter, mem};
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use grenad::ReaderCursor;
|
use grenad::{CompressionType, ReaderCursor};
|
||||||
use hashbrown::hash_map::RawEntryMut;
|
use hashbrown::hash_map::RawEntryMut;
|
||||||
use hashbrown::{DefaultHashBuilder, HashMap, HashSet};
|
use hashbrown::{DefaultHashBuilder, HashMap};
|
||||||
use raw_collections::map::FrozenMap;
|
use raw_collections::map::FrozenMap;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
|
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
|
||||||
use crate::update::new::indexer::document_changes::MostlySend;
|
use crate::update::new::indexer::document_changes::MostlySend;
|
||||||
use crate::update::new::KvReaderDelAdd;
|
use crate::update::new::KvReaderDelAdd;
|
||||||
use crate::update::MergeDeladdCboRoaringBitmaps;
|
use crate::update::{create_writer, writer_into_reader, MergeDeladdCboRoaringBitmaps};
|
||||||
use crate::CboRoaringBitmapCodec;
|
use crate::{CboRoaringBitmapCodec, Result};
|
||||||
|
|
||||||
// # How the Merge Algorithm works
|
// # How the Merge Algorithm works
|
||||||
//
|
//
|
||||||
@ -77,7 +78,7 @@ enum InnerCaches<'extractor> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'extractor> CboCachedSorter<'extractor> {
|
impl<'extractor> CboCachedSorter<'extractor> {
|
||||||
pub fn new_in(buckets: usize, alloc: &'extractor Bump) -> Self {
|
pub fn new_in(buckets: usize, max_memory: usize, alloc: &'extractor Bump) -> Self {
|
||||||
Self {
|
Self {
|
||||||
hasher: DefaultHashBuilder::default(),
|
hasher: DefaultHashBuilder::default(),
|
||||||
caches: InnerCaches::Normal(NormalCaches {
|
caches: InnerCaches::Normal(NormalCaches {
|
||||||
@ -132,8 +133,29 @@ impl<'extractor> CboCachedSorter<'extractor> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn freeze(&mut self) -> Vec<()> {
|
pub fn freeze(&mut self) -> Result<Vec<FrozenCache<'_, 'extractor>>> {
|
||||||
todo!()
|
match &mut self.caches {
|
||||||
|
InnerCaches::Normal(NormalCaches { caches }) => caches
|
||||||
|
.iter_mut()
|
||||||
|
.map(|map| {
|
||||||
|
let file = tempfile::tempfile()?;
|
||||||
|
let writer = create_writer(CompressionType::None, None, file);
|
||||||
|
let spilled = writer_into_reader(writer)?;
|
||||||
|
Ok(FrozenCache { cache: FrozenMap::new(map), spilled })
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
InnerCaches::Spilling(SpillingCaches { caches, spilled_entries, .. }) => caches
|
||||||
|
.iter_mut()
|
||||||
|
.zip(mem::take(spilled_entries))
|
||||||
|
.map(|(map, sorter)| {
|
||||||
|
let file = tempfile::tempfile()?;
|
||||||
|
let mut writer = create_writer(CompressionType::None, None, file);
|
||||||
|
sorter.write_into_stream_writer(&mut writer)?;
|
||||||
|
let spilled = writer_into_reader(writer)?;
|
||||||
|
Ok(FrozenCache { cache: FrozenMap::new(map), spilled })
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -328,17 +350,19 @@ fn spill_entry_to_disk(
|
|||||||
|
|
||||||
pub struct FrozenCache<'a, 'extractor> {
|
pub struct FrozenCache<'a, 'extractor> {
|
||||||
cache: FrozenMap<'a, 'extractor, &'extractor [u8], DelAddRoaringBitmap, DefaultHashBuilder>,
|
cache: FrozenMap<'a, 'extractor, &'extractor [u8], DelAddRoaringBitmap, DefaultHashBuilder>,
|
||||||
spilled: grenad::Reader<File>,
|
spilled: grenad::Reader<BufReader<File>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn merge_me<F>(frozen: Vec<FrozenCache>, mut iter: F) -> crate::Result<()>
|
/// Merges the caches that must be all associated to the same bucket.
|
||||||
|
pub fn merge_caches<F>(frozen: Vec<FrozenCache>, mut iter: F) -> Result<()>
|
||||||
where
|
where
|
||||||
F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> crate::Result<()>,
|
F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
|
||||||
{
|
{
|
||||||
let (mut maps, spilled): (Vec<_>, Vec<_>) =
|
let (mut maps, spilled): (Vec<_>, Vec<_>) =
|
||||||
frozen.into_iter().map(|FrozenCache { cache, spilled }| (cache, spilled)).collect();
|
frozen.into_iter().map(|FrozenCache { cache, spilled }| (cache, spilled)).collect();
|
||||||
|
|
||||||
// First manage the spilled entries by looking into the HashMaps and then merge them.
|
// First manage the spilled entries by looking into the HashMaps,
|
||||||
|
// merge them and mark them as dummy.
|
||||||
let mut heap = BinaryHeap::new();
|
let mut heap = BinaryHeap::new();
|
||||||
for (source_index, source) in spilled.into_iter().enumerate() {
|
for (source_index, source) in spilled.into_iter().enumerate() {
|
||||||
let mut cursor = source.into_cursor()?;
|
let mut cursor = source.into_cursor()?;
|
||||||
@ -395,9 +419,27 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then manage the content on the HashMap that weren't taken (mem::take).
|
// Then manage the content on the HashMap entries that weren't taken (mem::take).
|
||||||
|
while let Some(mut map) = maps.pop() {
|
||||||
|
for (key, output) in map.iter_mut() {
|
||||||
|
let mut output = mem::replace(output, DelAddRoaringBitmap::dummy());
|
||||||
|
|
||||||
todo!()
|
// Make sure we don't try to work with entries already managed by the spilled
|
||||||
|
if !output.is_dummy() {
|
||||||
|
for rhs in maps.iter_mut() {
|
||||||
|
if let Some(new) = rhs.get_mut(key) {
|
||||||
|
let new = mem::replace(new, DelAddRoaringBitmap::dummy());
|
||||||
|
output.merge(new);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We send the merged entry outside.
|
||||||
|
(iter)(key, output)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Entry<R> {
|
struct Entry<R> {
|
||||||
@ -454,6 +496,11 @@ impl DelAddRoaringBitmap {
|
|||||||
DelAddRoaringBitmap { del: None, add: None }
|
DelAddRoaringBitmap { del: None, add: None }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_dummy(&self) -> bool {
|
||||||
|
let DelAddRoaringBitmap { del, add } = self;
|
||||||
|
del.is_none() && add.is_none()
|
||||||
|
}
|
||||||
|
|
||||||
fn new_del_add_u32(n: u32) -> Self {
|
fn new_del_add_u32(n: u32) -> Self {
|
||||||
DelAddRoaringBitmap {
|
DelAddRoaringBitmap {
|
||||||
del: Some(RoaringBitmap::from([n])),
|
del: Some(RoaringBitmap::from([n])),
|
||||||
|
Loading…
Reference in New Issue
Block a user