From 2444ddbd3d11a68b3e71f1f1499d3ef70aee3c47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 24 Oct 2024 17:47:41 +0200 Subject: [PATCH] Use the bumpalo Bump instead of the RefBump/RefBytes --- milli/src/update/new/extract/cache.rs | 42 ++++++++++----------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/milli/src/update/new/extract/cache.rs b/milli/src/update/new/extract/cache.rs index 7152ec727..5a4bc6b92 100644 --- a/milli/src/update/new/extract/cache.rs +++ b/milli/src/update/new/extract/cache.rs @@ -1,17 +1,14 @@ -use std::borrow::BorrowMut; use std::fs::File; use std::hash::BuildHasher; use std::io::{self, BufReader, BufWriter, Read as _, Seek, Write as _}; use std::vec; +use bumpalo::Bump; use hashbrown::hash_map::RawEntryMut; use hashbrown::{DefaultHashBuilder, HashMap}; -use raw_collections::alloc::{RefBump, RefBytes}; use roaring::RoaringBitmap; -use tempfile::tempfile; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; -use crate::update::new::indexer::document_changes::MostlySend; use crate::CboRoaringBitmapCodec; // # How the Merge Algorithm works @@ -60,7 +57,7 @@ use crate::CboRoaringBitmapCodec; pub struct CboCachedSorter<'extractor> { hasher: DefaultHashBuilder, - alloc: RefBump<'extractor>, + alloc: &'extractor Bump, caches: InnerCaches<'extractor>, } @@ -70,14 +67,11 @@ enum InnerCaches<'extractor> { } impl<'extractor> CboCachedSorter<'extractor> { - pub fn new_in(buckets: usize, alloc: RefBump<'extractor>) -> Self { + pub fn new_in(buckets: usize, alloc: &'extractor Bump) -> Self { Self { hasher: DefaultHashBuilder::default(), caches: InnerCaches::Normal(NormalCaches { - caches: std::iter::repeat_with(|| RefBump::clone(&alloc)) - .map(HashMap::new_in) - .take(buckets) - .collect(), + caches: std::iter::repeat_with(|| HashMap::new_in(alloc)).take(buckets).collect(), }), alloc, } @@ -122,16 +116,15 @@ impl<'extractor> CboCachedSorter<'extractor> { } struct NormalCaches<'extractor> { - caches: Vec< - HashMap, DelAddRoaringBitmap, DefaultHashBuilder, RefBump<'extractor>>, - >, + caches: + Vec>, } impl<'extractor> NormalCaches<'extractor> { pub fn insert_del_u32( &mut self, hasher: &DefaultHashBuilder, - alloc: &RefBump<'extractor>, + alloc: &'extractor Bump, buckets: usize, key: &[u8], n: u32, @@ -139,16 +132,14 @@ impl<'extractor> NormalCaches<'extractor> { let hash = compute_bytes_hash(hasher, key); let bucket = compute_bucket_from_hash(buckets, hash); - match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k.as_ref() == key) { + match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k == key) { RawEntryMut::Occupied(mut entry) => { entry.get_mut().del.get_or_insert_with(RoaringBitmap::default).insert(n); } RawEntryMut::Vacant(entry) => { - let alloc = RefBump::clone(&alloc); - let key = RefBump::map(alloc, |a| a.alloc_slice_copy(key)); entry.insert_hashed_nocheck( hash, - RefBytes(key), + alloc.alloc_slice_copy(key), DelAddRoaringBitmap::new_del_u32(n), ); } @@ -158,23 +149,21 @@ impl<'extractor> NormalCaches<'extractor> { pub fn insert_add_u32( &mut self, hasher: &DefaultHashBuilder, - alloc: &RefBump<'extractor>, + alloc: &'extractor Bump, buckets: usize, key: &[u8], n: u32, ) { let hash = compute_bytes_hash(hasher, key); let bucket = compute_bucket_from_hash(buckets, hash); - match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k.as_ref() == key) { + match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k == key) { RawEntryMut::Occupied(mut entry) => { entry.get_mut().add.get_or_insert_with(RoaringBitmap::default).insert(n); } RawEntryMut::Vacant(entry) => { - let alloc = RefBump::clone(&alloc); - let key = RefBump::map(alloc, |a| a.alloc_slice_copy(key)); entry.insert_hashed_nocheck( hash, - RefBytes(key), + alloc.alloc_slice_copy(key), DelAddRoaringBitmap::new_add_u32(n), ); } @@ -183,9 +172,8 @@ impl<'extractor> NormalCaches<'extractor> { } struct SpillingCaches<'extractor> { - caches: Vec< - HashMap, DelAddRoaringBitmap, DefaultHashBuilder, RefBump<'extractor>>, - >, + caches: + Vec>, // TODO it must be a grenad Sorter with a DelAddCboRoaringBitmapCodec spilled_entries: Vec, deladd_buffer: Vec, @@ -248,6 +236,7 @@ impl<'extractor> SpillingCaches<'extractor> { } } +#[inline] fn compute_bytes_hash(hash_builder: &S, key: &[u8]) -> u64 { use std::hash::{Hash, Hasher}; let mut state = hash_builder.build_hasher(); @@ -255,6 +244,7 @@ fn compute_bytes_hash(hash_builder: &S, key: &[u8]) -> u64 { state.finish() } +#[inline] fn compute_bucket_from_hash(buckets: usize, hash: u64) -> usize { hash as usize % buckets }