mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-22 20:50:04 +01:00
Introduce a new custom Lru
This commit is contained in:
parent
3f7a500f3b
commit
759b9b1546
11
Cargo.lock
generated
11
Cargo.lock
generated
@ -2307,9 +2307,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.3"
|
||||
version = "0.14.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
|
||||
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"allocator-api2",
|
||||
@ -2591,7 +2591,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown 0.14.3",
|
||||
"hashbrown 0.14.5",
|
||||
"serde",
|
||||
]
|
||||
|
||||
@ -3318,7 +3318,7 @@ version = "0.12.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37ee39891760e7d94734f6f63fedc29a2e4a152f836120753a72503f09fcf904"
|
||||
dependencies = [
|
||||
"hashbrown 0.14.3",
|
||||
"hashbrown 0.14.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -3575,6 +3575,7 @@ dependencies = [
|
||||
"fxhash",
|
||||
"geoutils",
|
||||
"grenad",
|
||||
"hashbrown 0.14.5",
|
||||
"heed",
|
||||
"hf-hub",
|
||||
"indexmap",
|
||||
@ -6049,7 +6050,7 @@ version = "0.16.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "471d1c1645d361eb782a1650b1786a8fb58dd625e681a04c09f5ff7c8764a7b0"
|
||||
dependencies = [
|
||||
"hashbrown 0.14.3",
|
||||
"hashbrown 0.14.5",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
|
@ -89,6 +89,7 @@ tracing = "0.1.40"
|
||||
ureq = { version = "2.10.0", features = ["json"] }
|
||||
url = "2.5.2"
|
||||
rayon-par-bridge = "0.1.0"
|
||||
hashbrown = "0.14.5"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
|
230
milli/src/update/new/lru.rs
Normal file
230
milli/src/update/new/lru.rs
Normal file
@ -0,0 +1,230 @@
|
||||
use std::borrow::Borrow;
|
||||
use std::hash::{BuildHasher, Hash};
|
||||
use std::iter::repeat_with;
|
||||
use std::mem;
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use hashbrown::hash_map::{DefaultHashBuilder, Entry};
|
||||
use hashbrown::HashMap;
|
||||
|
||||
pub struct Lru<K, V, S = DefaultHashBuilder> {
|
||||
lookup: HashMap<K, usize, S>,
|
||||
storage: FixedSizeList<LruNode<K, V>>,
|
||||
}
|
||||
|
||||
impl<K: Eq + Hash, V> Lru<K, V> {
|
||||
/// Creates a new LRU cache that holds at most `capacity` elements.
|
||||
pub fn new(capacity: NonZeroUsize) -> Self {
|
||||
Self { lookup: HashMap::new(), storage: FixedSizeList::new(capacity.get()) }
|
||||
}
|
||||
}
|
||||
|
||||
impl<K: Eq + Hash, V, S: BuildHasher> Lru<K, V, S> {
|
||||
/// Creates a new LRU cache that holds at most `capacity` elements
|
||||
/// and uses the provided hash builder to hash keys.
|
||||
pub fn with_hasher(capacity: NonZeroUsize, hash_builder: S) -> Lru<K, V, S> {
|
||||
Self {
|
||||
lookup: HashMap::with_hasher(hash_builder),
|
||||
storage: FixedSizeList::new(capacity.get()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<K: Eq + Hash, V, S: BuildHasher> Lru<K, V, S> {
|
||||
/// Returns a mutable reference to the value of the key in the cache or `None` if it is not present in the cache.
|
||||
///
|
||||
/// Moves the key to the head of the LRU list if it exists.
|
||||
pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
|
||||
where
|
||||
K: Borrow<Q>,
|
||||
Q: Hash + Eq + ?Sized,
|
||||
{
|
||||
let idx = *self.lookup.get(key)?;
|
||||
self.storage.move_front(idx).map(|node| &mut node.value)
|
||||
}
|
||||
}
|
||||
|
||||
impl<K: Clone + Eq + Hash, V, S: BuildHasher> Lru<K, V, S> {
|
||||
pub fn push(&mut self, key: K, value: V) -> Option<(K, V)> {
|
||||
match self.lookup.entry(key) {
|
||||
Entry::Occupied(occ) => {
|
||||
// It's fine to unwrap here because:
|
||||
// * the entry already exists
|
||||
let node = self.storage.move_front(*occ.get()).unwrap();
|
||||
let old_value = mem::replace(&mut node.value, value);
|
||||
let old_key = occ.replace_key();
|
||||
Some((old_key, old_value))
|
||||
}
|
||||
Entry::Vacant(vac) => {
|
||||
let key = vac.key().clone();
|
||||
if self.storage.is_full() {
|
||||
let idx = self.storage.back_idx();
|
||||
// It's fine to unwrap here because:
|
||||
// * the cache capacity is non zero
|
||||
// * the cache is full
|
||||
let node = self.storage.move_front(idx).unwrap();
|
||||
let LruNode { key, value } = mem::replace(node, LruNode { key, value });
|
||||
vac.insert(idx);
|
||||
self.lookup.remove(&key);
|
||||
Some((key, value))
|
||||
} else {
|
||||
// It's fine to unwrap here because:
|
||||
// * the cache capacity is non zero
|
||||
// * the cache is not full
|
||||
let (idx, _) = self.storage.push_front(LruNode { key, value }).unwrap();
|
||||
vac.insert(idx);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, V, S> IntoIterator for Lru<K, V, S> {
|
||||
type Item = (K, V);
|
||||
type IntoIter = IntoIter<K, V>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
IntoIter { lookup_iter: self.lookup.into_iter(), nodes: self.storage.nodes }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct IntoIter<K, V> {
|
||||
lookup_iter: hashbrown::hash_map::IntoIter<K, usize>,
|
||||
nodes: Box<[Option<FixedSizeListNode<LruNode<K, V>>>]>,
|
||||
}
|
||||
|
||||
impl<K, V> Iterator for IntoIter<K, V> {
|
||||
type Item = (K, V);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let (_key, idx) = self.lookup_iter.next()?;
|
||||
let LruNode { key, value } = self.nodes.get_mut(idx)?.take()?.data;
|
||||
Some((key, value))
|
||||
}
|
||||
}
|
||||
|
||||
struct LruNode<K, V> {
|
||||
key: K,
|
||||
value: V,
|
||||
}
|
||||
|
||||
struct FixedSizeListNode<T> {
|
||||
prev: usize,
|
||||
next: usize,
|
||||
data: T,
|
||||
}
|
||||
|
||||
struct FixedSizeList<T> {
|
||||
nodes: Box<[Option<FixedSizeListNode<T>>]>,
|
||||
// An un-ordered set of indices that are not in use in `nodes`.
|
||||
// All `None` entries in `nodes` _must_ be listed in `free`.
|
||||
// A `Vec<usize>` was choosen in order to have O(1) complexity
|
||||
// for pop and avoid having to go through `nodes` in order to
|
||||
// to find a free place.
|
||||
// TODO remove the free list as it is always growing:
|
||||
// we cannot remove entries from the map.
|
||||
// Also, we probably do not need one of the front and back cursors.
|
||||
free: Vec<usize>,
|
||||
front: usize,
|
||||
back: usize,
|
||||
}
|
||||
|
||||
impl<T> FixedSizeList<T> {
|
||||
fn new(capacity: usize) -> Self {
|
||||
Self {
|
||||
nodes: repeat_with(|| None).take(capacity).collect::<Vec<_>>().into_boxed_slice(),
|
||||
free: (0..capacity).collect(),
|
||||
front: usize::MAX,
|
||||
back: usize::MAX,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn capacity(&self) -> usize {
|
||||
self.nodes.len()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn len(&self) -> usize {
|
||||
self.nodes.len() - self.free.len()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_full(&self) -> bool {
|
||||
self.len() == self.capacity()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn back_idx(&self) -> usize {
|
||||
self.back
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<usize> {
|
||||
self.free.pop()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn node_mut(&mut self, idx: usize) -> Option<&mut FixedSizeListNode<T>> {
|
||||
self.nodes.get_mut(idx).and_then(|node| node.as_mut())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn node_ref(&self, idx: usize) -> Option<&FixedSizeListNode<T>> {
|
||||
self.nodes.get(idx).and_then(|node| node.as_ref())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn move_front(&mut self, idx: usize) -> Option<&mut T> {
|
||||
let node = self.nodes.get_mut(idx)?.take()?;
|
||||
if let Some(prev) = self.node_mut(node.prev) {
|
||||
prev.next = node.next;
|
||||
} else {
|
||||
self.front = node.next;
|
||||
}
|
||||
if let Some(next) = self.node_mut(node.next) {
|
||||
next.prev = node.prev;
|
||||
} else {
|
||||
self.back = node.prev;
|
||||
}
|
||||
|
||||
if let Some(front) = self.node_mut(self.front) {
|
||||
front.prev = idx;
|
||||
}
|
||||
if self.node_ref(self.back).is_none() {
|
||||
self.back = idx;
|
||||
}
|
||||
|
||||
let node = self.nodes.get_mut(idx).unwrap().insert(FixedSizeListNode {
|
||||
prev: usize::MAX,
|
||||
next: self.front,
|
||||
data: node.data,
|
||||
});
|
||||
self.front = idx;
|
||||
Some(&mut node.data)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn push_front(&mut self, data: T) -> Option<(usize, &mut T)> {
|
||||
let idx = self.next()?;
|
||||
if let Some(front) = self.node_mut(self.front) {
|
||||
front.prev = idx;
|
||||
}
|
||||
if self.node_ref(self.back).is_none() {
|
||||
self.back = idx;
|
||||
}
|
||||
let node = self.nodes.get_mut(idx).unwrap().insert(FixedSizeListNode {
|
||||
prev: usize::MAX,
|
||||
next: self.front,
|
||||
data,
|
||||
});
|
||||
self.front = idx;
|
||||
Some((idx, &mut node.data))
|
||||
}
|
||||
}
|
@ -10,6 +10,7 @@ mod document_change;
|
||||
mod extract;
|
||||
pub mod indexer;
|
||||
mod items_pool;
|
||||
mod lru;
|
||||
mod merger;
|
||||
mod top_level_map;
|
||||
mod word_fst_builder;
|
||||
|
Loading…
x
Reference in New Issue
Block a user