mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-10-30 01:38:49 +01:00
Use the bumpalo Bump instead of the RefBump/RefBytes
This commit is contained in:
parent
b7e106b34a
commit
2444ddbd3d
@ -1,17 +1,14 @@
|
|||||||
use std::borrow::BorrowMut;
|
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::hash::BuildHasher;
|
use std::hash::BuildHasher;
|
||||||
use std::io::{self, BufReader, BufWriter, Read as _, Seek, Write as _};
|
use std::io::{self, BufReader, BufWriter, Read as _, Seek, Write as _};
|
||||||
use std::vec;
|
use std::vec;
|
||||||
|
|
||||||
|
use bumpalo::Bump;
|
||||||
use hashbrown::hash_map::RawEntryMut;
|
use hashbrown::hash_map::RawEntryMut;
|
||||||
use hashbrown::{DefaultHashBuilder, HashMap};
|
use hashbrown::{DefaultHashBuilder, HashMap};
|
||||||
use raw_collections::alloc::{RefBump, RefBytes};
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use tempfile::tempfile;
|
|
||||||
|
|
||||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||||
use crate::update::new::indexer::document_changes::MostlySend;
|
|
||||||
use crate::CboRoaringBitmapCodec;
|
use crate::CboRoaringBitmapCodec;
|
||||||
|
|
||||||
// # How the Merge Algorithm works
|
// # How the Merge Algorithm works
|
||||||
@ -60,7 +57,7 @@ use crate::CboRoaringBitmapCodec;
|
|||||||
|
|
||||||
pub struct CboCachedSorter<'extractor> {
|
pub struct CboCachedSorter<'extractor> {
|
||||||
hasher: DefaultHashBuilder,
|
hasher: DefaultHashBuilder,
|
||||||
alloc: RefBump<'extractor>,
|
alloc: &'extractor Bump,
|
||||||
caches: InnerCaches<'extractor>,
|
caches: InnerCaches<'extractor>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -70,14 +67,11 @@ enum InnerCaches<'extractor> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'extractor> CboCachedSorter<'extractor> {
|
impl<'extractor> CboCachedSorter<'extractor> {
|
||||||
pub fn new_in(buckets: usize, alloc: RefBump<'extractor>) -> Self {
|
pub fn new_in(buckets: usize, alloc: &'extractor Bump) -> Self {
|
||||||
Self {
|
Self {
|
||||||
hasher: DefaultHashBuilder::default(),
|
hasher: DefaultHashBuilder::default(),
|
||||||
caches: InnerCaches::Normal(NormalCaches {
|
caches: InnerCaches::Normal(NormalCaches {
|
||||||
caches: std::iter::repeat_with(|| RefBump::clone(&alloc))
|
caches: std::iter::repeat_with(|| HashMap::new_in(alloc)).take(buckets).collect(),
|
||||||
.map(HashMap::new_in)
|
|
||||||
.take(buckets)
|
|
||||||
.collect(),
|
|
||||||
}),
|
}),
|
||||||
alloc,
|
alloc,
|
||||||
}
|
}
|
||||||
@ -122,16 +116,15 @@ impl<'extractor> CboCachedSorter<'extractor> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct NormalCaches<'extractor> {
|
struct NormalCaches<'extractor> {
|
||||||
caches: Vec<
|
caches:
|
||||||
HashMap<RefBytes<'extractor>, DelAddRoaringBitmap, DefaultHashBuilder, RefBump<'extractor>>,
|
Vec<HashMap<&'extractor [u8], DelAddRoaringBitmap, DefaultHashBuilder, &'extractor Bump>>,
|
||||||
>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'extractor> NormalCaches<'extractor> {
|
impl<'extractor> NormalCaches<'extractor> {
|
||||||
pub fn insert_del_u32(
|
pub fn insert_del_u32(
|
||||||
&mut self,
|
&mut self,
|
||||||
hasher: &DefaultHashBuilder,
|
hasher: &DefaultHashBuilder,
|
||||||
alloc: &RefBump<'extractor>,
|
alloc: &'extractor Bump,
|
||||||
buckets: usize,
|
buckets: usize,
|
||||||
key: &[u8],
|
key: &[u8],
|
||||||
n: u32,
|
n: u32,
|
||||||
@ -139,16 +132,14 @@ impl<'extractor> NormalCaches<'extractor> {
|
|||||||
let hash = compute_bytes_hash(hasher, key);
|
let hash = compute_bytes_hash(hasher, key);
|
||||||
let bucket = compute_bucket_from_hash(buckets, hash);
|
let bucket = compute_bucket_from_hash(buckets, hash);
|
||||||
|
|
||||||
match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k.as_ref() == key) {
|
match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k == key) {
|
||||||
RawEntryMut::Occupied(mut entry) => {
|
RawEntryMut::Occupied(mut entry) => {
|
||||||
entry.get_mut().del.get_or_insert_with(RoaringBitmap::default).insert(n);
|
entry.get_mut().del.get_or_insert_with(RoaringBitmap::default).insert(n);
|
||||||
}
|
}
|
||||||
RawEntryMut::Vacant(entry) => {
|
RawEntryMut::Vacant(entry) => {
|
||||||
let alloc = RefBump::clone(&alloc);
|
|
||||||
let key = RefBump::map(alloc, |a| a.alloc_slice_copy(key));
|
|
||||||
entry.insert_hashed_nocheck(
|
entry.insert_hashed_nocheck(
|
||||||
hash,
|
hash,
|
||||||
RefBytes(key),
|
alloc.alloc_slice_copy(key),
|
||||||
DelAddRoaringBitmap::new_del_u32(n),
|
DelAddRoaringBitmap::new_del_u32(n),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -158,23 +149,21 @@ impl<'extractor> NormalCaches<'extractor> {
|
|||||||
pub fn insert_add_u32(
|
pub fn insert_add_u32(
|
||||||
&mut self,
|
&mut self,
|
||||||
hasher: &DefaultHashBuilder,
|
hasher: &DefaultHashBuilder,
|
||||||
alloc: &RefBump<'extractor>,
|
alloc: &'extractor Bump,
|
||||||
buckets: usize,
|
buckets: usize,
|
||||||
key: &[u8],
|
key: &[u8],
|
||||||
n: u32,
|
n: u32,
|
||||||
) {
|
) {
|
||||||
let hash = compute_bytes_hash(hasher, key);
|
let hash = compute_bytes_hash(hasher, key);
|
||||||
let bucket = compute_bucket_from_hash(buckets, hash);
|
let bucket = compute_bucket_from_hash(buckets, hash);
|
||||||
match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k.as_ref() == key) {
|
match self.caches[bucket].raw_entry_mut().from_hash(hash, |k| k == key) {
|
||||||
RawEntryMut::Occupied(mut entry) => {
|
RawEntryMut::Occupied(mut entry) => {
|
||||||
entry.get_mut().add.get_or_insert_with(RoaringBitmap::default).insert(n);
|
entry.get_mut().add.get_or_insert_with(RoaringBitmap::default).insert(n);
|
||||||
}
|
}
|
||||||
RawEntryMut::Vacant(entry) => {
|
RawEntryMut::Vacant(entry) => {
|
||||||
let alloc = RefBump::clone(&alloc);
|
|
||||||
let key = RefBump::map(alloc, |a| a.alloc_slice_copy(key));
|
|
||||||
entry.insert_hashed_nocheck(
|
entry.insert_hashed_nocheck(
|
||||||
hash,
|
hash,
|
||||||
RefBytes(key),
|
alloc.alloc_slice_copy(key),
|
||||||
DelAddRoaringBitmap::new_add_u32(n),
|
DelAddRoaringBitmap::new_add_u32(n),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -183,9 +172,8 @@ impl<'extractor> NormalCaches<'extractor> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct SpillingCaches<'extractor> {
|
struct SpillingCaches<'extractor> {
|
||||||
caches: Vec<
|
caches:
|
||||||
HashMap<RefBytes<'extractor>, DelAddRoaringBitmap, DefaultHashBuilder, RefBump<'extractor>>,
|
Vec<HashMap<&'extractor [u8], DelAddRoaringBitmap, DefaultHashBuilder, &'extractor Bump>>,
|
||||||
>,
|
|
||||||
// TODO it must be a grenad Sorter with a DelAddCboRoaringBitmapCodec
|
// TODO it must be a grenad Sorter with a DelAddCboRoaringBitmapCodec
|
||||||
spilled_entries: Vec<UnorderedEntries>,
|
spilled_entries: Vec<UnorderedEntries>,
|
||||||
deladd_buffer: Vec<u8>,
|
deladd_buffer: Vec<u8>,
|
||||||
@ -248,6 +236,7 @@ impl<'extractor> SpillingCaches<'extractor> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn compute_bytes_hash<S: BuildHasher>(hash_builder: &S, key: &[u8]) -> u64 {
|
fn compute_bytes_hash<S: BuildHasher>(hash_builder: &S, key: &[u8]) -> u64 {
|
||||||
use std::hash::{Hash, Hasher};
|
use std::hash::{Hash, Hasher};
|
||||||
let mut state = hash_builder.build_hasher();
|
let mut state = hash_builder.build_hasher();
|
||||||
@ -255,6 +244,7 @@ fn compute_bytes_hash<S: BuildHasher>(hash_builder: &S, key: &[u8]) -> u64 {
|
|||||||
state.finish()
|
state.finish()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn compute_bucket_from_hash(buckets: usize, hash: u64) -> usize {
|
fn compute_bucket_from_hash(buckets: usize, hash: u64) -> usize {
|
||||||
hash as usize % buckets
|
hash as usize % buckets
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user