Use the bumpalo Bump instead of the RefBump/RefBytes

This commit is contained in:
Clément Renault 2024-10-24 17:47:41 +02:00
parent b7e106b34a
commit 2444ddbd3d
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F

View File

@ -1,17 +1,14 @@
use std::borrow::BorrowMut;
use std::fs::File;
use std::hash::BuildHasher;
use std::io::{self, BufReader, BufWriter, Read as _, Seek, Write as _};
use std::vec;
use bumpalo::Bump;
use hashbrown::hash_map::RawEntryMut;
use hashbrown::{DefaultHashBuilder, HashMap};
use raw_collections::alloc::{RefBump, RefBytes};
use roaring::RoaringBitmap;
use tempfile::tempfile;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::new::indexer::document_changes::MostlySend;
use crate::CboRoaringBitmapCodec;
// # How the Merge Algorithm works
@ -60,7 +57,7 @@ use crate::CboRoaringBitmapCodec;
pub struct CboCachedSorter<'extractor> {
hasher: DefaultHashBuilder,
alloc: RefBump<'extractor>,
alloc: &'extractor Bump,
caches: InnerCaches<'extractor>,
}
@ -70,14 +67,11 @@ enum InnerCaches<'extractor> {
}
impl<'extractor> CboCachedSorter<'extractor> {
pub fn new_in(buckets: usize, alloc: RefBump<'extractor>) -> Self {
pub fn new_in(buckets: usize, alloc: &'extractor Bump) -> Self {
Self {
hasher: DefaultHashBuilder::default(),
caches: InnerCaches::Normal(NormalCaches {
caches: std::iter::repeat_with(|| RefBump::clone(&alloc))
.map(HashMap::new_in)
.take(buckets)
.collect(),
caches: std::iter::repeat_with(|| HashMap::new_in(alloc)).take(buckets).collect(),
}),
alloc,
}
@ -122,16 +116,15 @@ impl<'extractor> CboCachedSorter<'extractor> {
}
struct NormalCaches<'extractor> {
caches: Vec<
HashMap<RefBytes<'extractor>, DelAddRoaringBitmap, DefaultHashBuilder, RefBump<'extractor>>,
>,
caches:
Vec<HashMap<&'extractor [u8], DelAddRoaringBitmap, DefaultHashBuilder, &'extractor Bump>>,
}
impl<'extractor> NormalCaches<'extractor> {
pub fn insert_del_u32(
&mut self,
hasher: &DefaultHashBuilder,
alloc: &RefBump<'extractor>,
alloc: &'extractor Bump,
buckets: usize,
key: &[u8],
n: u32,
@ -139,16 +132,14 @@ impl<'extractor> NormalCaches<'extractor> {
let hash = compute_bytes_hash(hasher, key);
let bucket = compute_bucket_from_hash(buckets, hash);
match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k.as_ref() == key) {
match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k == key) {
RawEntryMut::Occupied(mut entry) => {
entry.get_mut().del.get_or_insert_with(RoaringBitmap::default).insert(n);
}
RawEntryMut::Vacant(entry) => {
let alloc = RefBump::clone(&alloc);
let key = RefBump::map(alloc, |a| a.alloc_slice_copy(key));
entry.insert_hashed_nocheck(
hash,
RefBytes(key),
alloc.alloc_slice_copy(key),
DelAddRoaringBitmap::new_del_u32(n),
);
}
@ -158,23 +149,21 @@ impl<'extractor> NormalCaches<'extractor> {
pub fn insert_add_u32(
&mut self,
hasher: &DefaultHashBuilder,
alloc: &RefBump<'extractor>,
alloc: &'extractor Bump,
buckets: usize,
key: &[u8],
n: u32,
) {
let hash = compute_bytes_hash(hasher, key);
let bucket = compute_bucket_from_hash(buckets, hash);
match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k.as_ref() == key) {
match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k == key) {
RawEntryMut::Occupied(mut entry) => {
entry.get_mut().add.get_or_insert_with(RoaringBitmap::default).insert(n);
}
RawEntryMut::Vacant(entry) => {
let alloc = RefBump::clone(&alloc);
let key = RefBump::map(alloc, |a| a.alloc_slice_copy(key));
entry.insert_hashed_nocheck(
hash,
RefBytes(key),
alloc.alloc_slice_copy(key),
DelAddRoaringBitmap::new_add_u32(n),
);
}
@ -183,9 +172,8 @@ impl<'extractor> NormalCaches<'extractor> {
}
struct SpillingCaches<'extractor> {
caches: Vec<
HashMap<RefBytes<'extractor>, DelAddRoaringBitmap, DefaultHashBuilder, RefBump<'extractor>>,
>,
caches:
Vec<HashMap<&'extractor [u8], DelAddRoaringBitmap, DefaultHashBuilder, &'extractor Bump>>,
// TODO it must be a grenad Sorter with a DelAddCboRoaringBitmapCodec
spilled_entries: Vec<UnorderedEntries>,
deladd_buffer: Vec<u8>,
@ -248,6 +236,7 @@ impl<'extractor> SpillingCaches<'extractor> {
}
}
#[inline]
fn compute_bytes_hash<S: BuildHasher>(hash_builder: &S, key: &[u8]) -> u64 {
use std::hash::{Hash, Hasher};
let mut state = hash_builder.build_hasher();
@ -255,6 +244,7 @@ fn compute_bytes_hash<S: BuildHasher>(hash_builder: &S, key: &[u8]) -> u64 {
state.finish()
}
#[inline]
fn compute_bucket_from_hash(buckets: usize, hash: u64) -> usize {
hash as usize % buckets
}