Use the bumpalo Bump instead of the RefBump/RefBytes

This commit is contained in:
Clément Renault 2024-10-24 17:47:41 +02:00
parent b7e106b34a
commit 2444ddbd3d
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F

View File

@ -1,17 +1,14 @@
use std::borrow::BorrowMut;
use std::fs::File; use std::fs::File;
use std::hash::BuildHasher; use std::hash::BuildHasher;
use std::io::{self, BufReader, BufWriter, Read as _, Seek, Write as _}; use std::io::{self, BufReader, BufWriter, Read as _, Seek, Write as _};
use std::vec; use std::vec;
use bumpalo::Bump;
use hashbrown::hash_map::RawEntryMut; use hashbrown::hash_map::RawEntryMut;
use hashbrown::{DefaultHashBuilder, HashMap}; use hashbrown::{DefaultHashBuilder, HashMap};
use raw_collections::alloc::{RefBump, RefBytes};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use tempfile::tempfile;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::new::indexer::document_changes::MostlySend;
use crate::CboRoaringBitmapCodec; use crate::CboRoaringBitmapCodec;
// # How the Merge Algorithm works // # How the Merge Algorithm works
@ -60,7 +57,7 @@ use crate::CboRoaringBitmapCodec;
pub struct CboCachedSorter<'extractor> { pub struct CboCachedSorter<'extractor> {
hasher: DefaultHashBuilder, hasher: DefaultHashBuilder,
alloc: RefBump<'extractor>, alloc: &'extractor Bump,
caches: InnerCaches<'extractor>, caches: InnerCaches<'extractor>,
} }
@ -70,14 +67,11 @@ enum InnerCaches<'extractor> {
} }
impl<'extractor> CboCachedSorter<'extractor> { impl<'extractor> CboCachedSorter<'extractor> {
pub fn new_in(buckets: usize, alloc: RefBump<'extractor>) -> Self { pub fn new_in(buckets: usize, alloc: &'extractor Bump) -> Self {
Self { Self {
hasher: DefaultHashBuilder::default(), hasher: DefaultHashBuilder::default(),
caches: InnerCaches::Normal(NormalCaches { caches: InnerCaches::Normal(NormalCaches {
caches: std::iter::repeat_with(|| RefBump::clone(&alloc)) caches: std::iter::repeat_with(|| HashMap::new_in(alloc)).take(buckets).collect(),
.map(HashMap::new_in)
.take(buckets)
.collect(),
}), }),
alloc, alloc,
} }
@ -122,16 +116,15 @@ impl<'extractor> CboCachedSorter<'extractor> {
} }
struct NormalCaches<'extractor> { struct NormalCaches<'extractor> {
caches: Vec< caches:
HashMap<RefBytes<'extractor>, DelAddRoaringBitmap, DefaultHashBuilder, RefBump<'extractor>>, Vec<HashMap<&'extractor [u8], DelAddRoaringBitmap, DefaultHashBuilder, &'extractor Bump>>,
>,
} }
impl<'extractor> NormalCaches<'extractor> { impl<'extractor> NormalCaches<'extractor> {
pub fn insert_del_u32( pub fn insert_del_u32(
&mut self, &mut self,
hasher: &DefaultHashBuilder, hasher: &DefaultHashBuilder,
alloc: &RefBump<'extractor>, alloc: &'extractor Bump,
buckets: usize, buckets: usize,
key: &[u8], key: &[u8],
n: u32, n: u32,
@ -139,16 +132,14 @@ impl<'extractor> NormalCaches<'extractor> {
let hash = compute_bytes_hash(hasher, key); let hash = compute_bytes_hash(hasher, key);
let bucket = compute_bucket_from_hash(buckets, hash); let bucket = compute_bucket_from_hash(buckets, hash);
match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k.as_ref() == key) { match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k == key) {
RawEntryMut::Occupied(mut entry) => { RawEntryMut::Occupied(mut entry) => {
entry.get_mut().del.get_or_insert_with(RoaringBitmap::default).insert(n); entry.get_mut().del.get_or_insert_with(RoaringBitmap::default).insert(n);
} }
RawEntryMut::Vacant(entry) => { RawEntryMut::Vacant(entry) => {
let alloc = RefBump::clone(&alloc);
let key = RefBump::map(alloc, |a| a.alloc_slice_copy(key));
entry.insert_hashed_nocheck( entry.insert_hashed_nocheck(
hash, hash,
RefBytes(key), alloc.alloc_slice_copy(key),
DelAddRoaringBitmap::new_del_u32(n), DelAddRoaringBitmap::new_del_u32(n),
); );
} }
@ -158,23 +149,21 @@ impl<'extractor> NormalCaches<'extractor> {
pub fn insert_add_u32( pub fn insert_add_u32(
&mut self, &mut self,
hasher: &DefaultHashBuilder, hasher: &DefaultHashBuilder,
alloc: &RefBump<'extractor>, alloc: &'extractor Bump,
buckets: usize, buckets: usize,
key: &[u8], key: &[u8],
n: u32, n: u32,
) { ) {
let hash = compute_bytes_hash(hasher, key); let hash = compute_bytes_hash(hasher, key);
let bucket = compute_bucket_from_hash(buckets, hash); let bucket = compute_bucket_from_hash(buckets, hash);
match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k.as_ref() == key) { match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k == key) {
RawEntryMut::Occupied(mut entry) => { RawEntryMut::Occupied(mut entry) => {
entry.get_mut().add.get_or_insert_with(RoaringBitmap::default).insert(n); entry.get_mut().add.get_or_insert_with(RoaringBitmap::default).insert(n);
} }
RawEntryMut::Vacant(entry) => { RawEntryMut::Vacant(entry) => {
let alloc = RefBump::clone(&alloc);
let key = RefBump::map(alloc, |a| a.alloc_slice_copy(key));
entry.insert_hashed_nocheck( entry.insert_hashed_nocheck(
hash, hash,
RefBytes(key), alloc.alloc_slice_copy(key),
DelAddRoaringBitmap::new_add_u32(n), DelAddRoaringBitmap::new_add_u32(n),
); );
} }
@ -183,9 +172,8 @@ impl<'extractor> NormalCaches<'extractor> {
} }
struct SpillingCaches<'extractor> { struct SpillingCaches<'extractor> {
caches: Vec< caches:
HashMap<RefBytes<'extractor>, DelAddRoaringBitmap, DefaultHashBuilder, RefBump<'extractor>>, Vec<HashMap<&'extractor [u8], DelAddRoaringBitmap, DefaultHashBuilder, &'extractor Bump>>,
>,
// TODO it must be a grenad Sorter with a DelAddCboRoaringBitmapCodec // TODO it must be a grenad Sorter with a DelAddCboRoaringBitmapCodec
spilled_entries: Vec<UnorderedEntries>, spilled_entries: Vec<UnorderedEntries>,
deladd_buffer: Vec<u8>, deladd_buffer: Vec<u8>,
@ -248,6 +236,7 @@ impl<'extractor> SpillingCaches<'extractor> {
} }
} }
#[inline]
fn compute_bytes_hash<S: BuildHasher>(hash_builder: &S, key: &[u8]) -> u64 { fn compute_bytes_hash<S: BuildHasher>(hash_builder: &S, key: &[u8]) -> u64 {
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
let mut state = hash_builder.build_hasher(); let mut state = hash_builder.build_hasher();
@ -255,6 +244,7 @@ fn compute_bytes_hash<S: BuildHasher>(hash_builder: &S, key: &[u8]) -> u64 {
state.finish() state.finish()
} }
#[inline]
fn compute_bucket_from_hash(buckets: usize, hash: u64) -> usize { fn compute_bucket_from_hash(buckets: usize, hash: u64) -> usize {
hash as usize % buckets hash as usize % buckets
} }