mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-10-30 01:38:49 +01:00
Use the bumpalo Bump instead of the RefBump/RefBytes
This commit is contained in:
parent
b7e106b34a
commit
2444ddbd3d
@ -1,17 +1,14 @@
|
||||
use std::borrow::BorrowMut;
|
||||
use std::fs::File;
|
||||
use std::hash::BuildHasher;
|
||||
use std::io::{self, BufReader, BufWriter, Read as _, Seek, Write as _};
|
||||
use std::vec;
|
||||
|
||||
use bumpalo::Bump;
|
||||
use hashbrown::hash_map::RawEntryMut;
|
||||
use hashbrown::{DefaultHashBuilder, HashMap};
|
||||
use raw_collections::alloc::{RefBump, RefBytes};
|
||||
use roaring::RoaringBitmap;
|
||||
use tempfile::tempfile;
|
||||
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::new::indexer::document_changes::MostlySend;
|
||||
use crate::CboRoaringBitmapCodec;
|
||||
|
||||
// # How the Merge Algorithm works
|
||||
@ -60,7 +57,7 @@ use crate::CboRoaringBitmapCodec;
|
||||
|
||||
pub struct CboCachedSorter<'extractor> {
|
||||
hasher: DefaultHashBuilder,
|
||||
alloc: RefBump<'extractor>,
|
||||
alloc: &'extractor Bump,
|
||||
caches: InnerCaches<'extractor>,
|
||||
}
|
||||
|
||||
@ -70,14 +67,11 @@ enum InnerCaches<'extractor> {
|
||||
}
|
||||
|
||||
impl<'extractor> CboCachedSorter<'extractor> {
|
||||
pub fn new_in(buckets: usize, alloc: RefBump<'extractor>) -> Self {
|
||||
pub fn new_in(buckets: usize, alloc: &'extractor Bump) -> Self {
|
||||
Self {
|
||||
hasher: DefaultHashBuilder::default(),
|
||||
caches: InnerCaches::Normal(NormalCaches {
|
||||
caches: std::iter::repeat_with(|| RefBump::clone(&alloc))
|
||||
.map(HashMap::new_in)
|
||||
.take(buckets)
|
||||
.collect(),
|
||||
caches: std::iter::repeat_with(|| HashMap::new_in(alloc)).take(buckets).collect(),
|
||||
}),
|
||||
alloc,
|
||||
}
|
||||
@ -122,16 +116,15 @@ impl<'extractor> CboCachedSorter<'extractor> {
|
||||
}
|
||||
|
||||
struct NormalCaches<'extractor> {
|
||||
caches: Vec<
|
||||
HashMap<RefBytes<'extractor>, DelAddRoaringBitmap, DefaultHashBuilder, RefBump<'extractor>>,
|
||||
>,
|
||||
caches:
|
||||
Vec<HashMap<&'extractor [u8], DelAddRoaringBitmap, DefaultHashBuilder, &'extractor Bump>>,
|
||||
}
|
||||
|
||||
impl<'extractor> NormalCaches<'extractor> {
|
||||
pub fn insert_del_u32(
|
||||
&mut self,
|
||||
hasher: &DefaultHashBuilder,
|
||||
alloc: &RefBump<'extractor>,
|
||||
alloc: &'extractor Bump,
|
||||
buckets: usize,
|
||||
key: &[u8],
|
||||
n: u32,
|
||||
@ -139,16 +132,14 @@ impl<'extractor> NormalCaches<'extractor> {
|
||||
let hash = compute_bytes_hash(hasher, key);
|
||||
let bucket = compute_bucket_from_hash(buckets, hash);
|
||||
|
||||
match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k.as_ref() == key) {
|
||||
match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k == key) {
|
||||
RawEntryMut::Occupied(mut entry) => {
|
||||
entry.get_mut().del.get_or_insert_with(RoaringBitmap::default).insert(n);
|
||||
}
|
||||
RawEntryMut::Vacant(entry) => {
|
||||
let alloc = RefBump::clone(&alloc);
|
||||
let key = RefBump::map(alloc, |a| a.alloc_slice_copy(key));
|
||||
entry.insert_hashed_nocheck(
|
||||
hash,
|
||||
RefBytes(key),
|
||||
alloc.alloc_slice_copy(key),
|
||||
DelAddRoaringBitmap::new_del_u32(n),
|
||||
);
|
||||
}
|
||||
@ -158,23 +149,21 @@ impl<'extractor> NormalCaches<'extractor> {
|
||||
pub fn insert_add_u32(
|
||||
&mut self,
|
||||
hasher: &DefaultHashBuilder,
|
||||
alloc: &RefBump<'extractor>,
|
||||
alloc: &'extractor Bump,
|
||||
buckets: usize,
|
||||
key: &[u8],
|
||||
n: u32,
|
||||
) {
|
||||
let hash = compute_bytes_hash(hasher, key);
|
||||
let bucket = compute_bucket_from_hash(buckets, hash);
|
||||
match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k.as_ref() == key) {
|
||||
match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k == key) {
|
||||
RawEntryMut::Occupied(mut entry) => {
|
||||
entry.get_mut().add.get_or_insert_with(RoaringBitmap::default).insert(n);
|
||||
}
|
||||
RawEntryMut::Vacant(entry) => {
|
||||
let alloc = RefBump::clone(&alloc);
|
||||
let key = RefBump::map(alloc, |a| a.alloc_slice_copy(key));
|
||||
entry.insert_hashed_nocheck(
|
||||
hash,
|
||||
RefBytes(key),
|
||||
alloc.alloc_slice_copy(key),
|
||||
DelAddRoaringBitmap::new_add_u32(n),
|
||||
);
|
||||
}
|
||||
@ -183,9 +172,8 @@ impl<'extractor> NormalCaches<'extractor> {
|
||||
}
|
||||
|
||||
struct SpillingCaches<'extractor> {
|
||||
caches: Vec<
|
||||
HashMap<RefBytes<'extractor>, DelAddRoaringBitmap, DefaultHashBuilder, RefBump<'extractor>>,
|
||||
>,
|
||||
caches:
|
||||
Vec<HashMap<&'extractor [u8], DelAddRoaringBitmap, DefaultHashBuilder, &'extractor Bump>>,
|
||||
// TODO it must be a grenad Sorter with a DelAddCboRoaringBitmapCodec
|
||||
spilled_entries: Vec<UnorderedEntries>,
|
||||
deladd_buffer: Vec<u8>,
|
||||
@ -248,6 +236,7 @@ impl<'extractor> SpillingCaches<'extractor> {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn compute_bytes_hash<S: BuildHasher>(hash_builder: &S, key: &[u8]) -> u64 {
|
||||
use std::hash::{Hash, Hasher};
|
||||
let mut state = hash_builder.build_hasher();
|
||||
@ -255,6 +244,7 @@ fn compute_bytes_hash<S: BuildHasher>(hash_builder: &S, key: &[u8]) -> u64 {
|
||||
state.finish()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn compute_bucket_from_hash(buckets: usize, hash: u64) -> usize {
|
||||
hash as usize % buckets
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user