mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 05:00:06 +01:00
Introduce a new custom Lru
This commit is contained in:
parent
3f7a500f3b
commit
759b9b1546
11
Cargo.lock
generated
11
Cargo.lock
generated
@ -2307,9 +2307,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hashbrown"
|
name = "hashbrown"
|
||||||
version = "0.14.3"
|
version = "0.14.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
|
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash 0.8.11",
|
"ahash 0.8.11",
|
||||||
"allocator-api2",
|
"allocator-api2",
|
||||||
@ -2591,7 +2591,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
|
checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"equivalent",
|
"equivalent",
|
||||||
"hashbrown 0.14.3",
|
"hashbrown 0.14.5",
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -3318,7 +3318,7 @@ version = "0.12.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "37ee39891760e7d94734f6f63fedc29a2e4a152f836120753a72503f09fcf904"
|
checksum = "37ee39891760e7d94734f6f63fedc29a2e4a152f836120753a72503f09fcf904"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hashbrown 0.14.3",
|
"hashbrown 0.14.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -3575,6 +3575,7 @@ dependencies = [
|
|||||||
"fxhash",
|
"fxhash",
|
||||||
"geoutils",
|
"geoutils",
|
||||||
"grenad",
|
"grenad",
|
||||||
|
"hashbrown 0.14.5",
|
||||||
"heed",
|
"heed",
|
||||||
"hf-hub",
|
"hf-hub",
|
||||||
"indexmap",
|
"indexmap",
|
||||||
@ -6049,7 +6050,7 @@ version = "0.16.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "471d1c1645d361eb782a1650b1786a8fb58dd625e681a04c09f5ff7c8764a7b0"
|
checksum = "471d1c1645d361eb782a1650b1786a8fb58dd625e681a04c09f5ff7c8764a7b0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hashbrown 0.14.3",
|
"hashbrown 0.14.5",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -89,6 +89,7 @@ tracing = "0.1.40"
|
|||||||
ureq = { version = "2.10.0", features = ["json"] }
|
ureq = { version = "2.10.0", features = ["json"] }
|
||||||
url = "2.5.2"
|
url = "2.5.2"
|
||||||
rayon-par-bridge = "0.1.0"
|
rayon-par-bridge = "0.1.0"
|
||||||
|
hashbrown = "0.14.5"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
mimalloc = { version = "0.1.43", default-features = false }
|
mimalloc = { version = "0.1.43", default-features = false }
|
||||||
|
230
milli/src/update/new/lru.rs
Normal file
230
milli/src/update/new/lru.rs
Normal file
@ -0,0 +1,230 @@
|
|||||||
|
use std::borrow::Borrow;
|
||||||
|
use std::hash::{BuildHasher, Hash};
|
||||||
|
use std::iter::repeat_with;
|
||||||
|
use std::mem;
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
|
|
||||||
|
use hashbrown::hash_map::{DefaultHashBuilder, Entry};
|
||||||
|
use hashbrown::HashMap;
|
||||||
|
|
||||||
|
pub struct Lru<K, V, S = DefaultHashBuilder> {
|
||||||
|
lookup: HashMap<K, usize, S>,
|
||||||
|
storage: FixedSizeList<LruNode<K, V>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<K: Eq + Hash, V> Lru<K, V> {
|
||||||
|
/// Creates a new LRU cache that holds at most `capacity` elements.
|
||||||
|
pub fn new(capacity: NonZeroUsize) -> Self {
|
||||||
|
Self { lookup: HashMap::new(), storage: FixedSizeList::new(capacity.get()) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<K: Eq + Hash, V, S: BuildHasher> Lru<K, V, S> {
|
||||||
|
/// Creates a new LRU cache that holds at most `capacity` elements
|
||||||
|
/// and uses the provided hash builder to hash keys.
|
||||||
|
pub fn with_hasher(capacity: NonZeroUsize, hash_builder: S) -> Lru<K, V, S> {
|
||||||
|
Self {
|
||||||
|
lookup: HashMap::with_hasher(hash_builder),
|
||||||
|
storage: FixedSizeList::new(capacity.get()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<K: Eq + Hash, V, S: BuildHasher> Lru<K, V, S> {
|
||||||
|
/// Returns a mutable reference to the value of the key in the cache or `None` if it is not present in the cache.
|
||||||
|
///
|
||||||
|
/// Moves the key to the head of the LRU list if it exists.
|
||||||
|
pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
|
||||||
|
where
|
||||||
|
K: Borrow<Q>,
|
||||||
|
Q: Hash + Eq + ?Sized,
|
||||||
|
{
|
||||||
|
let idx = *self.lookup.get(key)?;
|
||||||
|
self.storage.move_front(idx).map(|node| &mut node.value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<K: Clone + Eq + Hash, V, S: BuildHasher> Lru<K, V, S> {
|
||||||
|
pub fn push(&mut self, key: K, value: V) -> Option<(K, V)> {
|
||||||
|
match self.lookup.entry(key) {
|
||||||
|
Entry::Occupied(occ) => {
|
||||||
|
// It's fine to unwrap here because:
|
||||||
|
// * the entry already exists
|
||||||
|
let node = self.storage.move_front(*occ.get()).unwrap();
|
||||||
|
let old_value = mem::replace(&mut node.value, value);
|
||||||
|
let old_key = occ.replace_key();
|
||||||
|
Some((old_key, old_value))
|
||||||
|
}
|
||||||
|
Entry::Vacant(vac) => {
|
||||||
|
let key = vac.key().clone();
|
||||||
|
if self.storage.is_full() {
|
||||||
|
let idx = self.storage.back_idx();
|
||||||
|
// It's fine to unwrap here because:
|
||||||
|
// * the cache capacity is non zero
|
||||||
|
// * the cache is full
|
||||||
|
let node = self.storage.move_front(idx).unwrap();
|
||||||
|
let LruNode { key, value } = mem::replace(node, LruNode { key, value });
|
||||||
|
vac.insert(idx);
|
||||||
|
self.lookup.remove(&key);
|
||||||
|
Some((key, value))
|
||||||
|
} else {
|
||||||
|
// It's fine to unwrap here because:
|
||||||
|
// * the cache capacity is non zero
|
||||||
|
// * the cache is not full
|
||||||
|
let (idx, _) = self.storage.push_front(LruNode { key, value }).unwrap();
|
||||||
|
vac.insert(idx);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<K, V, S> IntoIterator for Lru<K, V, S> {
|
||||||
|
type Item = (K, V);
|
||||||
|
type IntoIter = IntoIter<K, V>;
|
||||||
|
|
||||||
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
|
IntoIter { lookup_iter: self.lookup.into_iter(), nodes: self.storage.nodes }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct IntoIter<K, V> {
|
||||||
|
lookup_iter: hashbrown::hash_map::IntoIter<K, usize>,
|
||||||
|
nodes: Box<[Option<FixedSizeListNode<LruNode<K, V>>>]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<K, V> Iterator for IntoIter<K, V> {
|
||||||
|
type Item = (K, V);
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
let (_key, idx) = self.lookup_iter.next()?;
|
||||||
|
let LruNode { key, value } = self.nodes.get_mut(idx)?.take()?.data;
|
||||||
|
Some((key, value))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct LruNode<K, V> {
|
||||||
|
key: K,
|
||||||
|
value: V,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct FixedSizeListNode<T> {
|
||||||
|
prev: usize,
|
||||||
|
next: usize,
|
||||||
|
data: T,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct FixedSizeList<T> {
|
||||||
|
nodes: Box<[Option<FixedSizeListNode<T>>]>,
|
||||||
|
// An un-ordered set of indices that are not in use in `nodes`.
|
||||||
|
// All `None` entries in `nodes` _must_ be listed in `free`.
|
||||||
|
// A `Vec<usize>` was choosen in order to have O(1) complexity
|
||||||
|
// for pop and avoid having to go through `nodes` in order to
|
||||||
|
// to find a free place.
|
||||||
|
// TODO remove the free list as it is always growing:
|
||||||
|
// we cannot remove entries from the map.
|
||||||
|
// Also, we probably do not need one of the front and back cursors.
|
||||||
|
free: Vec<usize>,
|
||||||
|
front: usize,
|
||||||
|
back: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> FixedSizeList<T> {
|
||||||
|
fn new(capacity: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
nodes: repeat_with(|| None).take(capacity).collect::<Vec<_>>().into_boxed_slice(),
|
||||||
|
free: (0..capacity).collect(),
|
||||||
|
front: usize::MAX,
|
||||||
|
back: usize::MAX,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn capacity(&self) -> usize {
|
||||||
|
self.nodes.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
self.nodes.len() - self.free.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_empty(&self) -> bool {
|
||||||
|
self.len() == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_full(&self) -> bool {
|
||||||
|
self.len() == self.capacity()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn back_idx(&self) -> usize {
|
||||||
|
self.back
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next(&mut self) -> Option<usize> {
|
||||||
|
self.free.pop()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn node_mut(&mut self, idx: usize) -> Option<&mut FixedSizeListNode<T>> {
|
||||||
|
self.nodes.get_mut(idx).and_then(|node| node.as_mut())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn node_ref(&self, idx: usize) -> Option<&FixedSizeListNode<T>> {
|
||||||
|
self.nodes.get(idx).and_then(|node| node.as_ref())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn move_front(&mut self, idx: usize) -> Option<&mut T> {
|
||||||
|
let node = self.nodes.get_mut(idx)?.take()?;
|
||||||
|
if let Some(prev) = self.node_mut(node.prev) {
|
||||||
|
prev.next = node.next;
|
||||||
|
} else {
|
||||||
|
self.front = node.next;
|
||||||
|
}
|
||||||
|
if let Some(next) = self.node_mut(node.next) {
|
||||||
|
next.prev = node.prev;
|
||||||
|
} else {
|
||||||
|
self.back = node.prev;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(front) = self.node_mut(self.front) {
|
||||||
|
front.prev = idx;
|
||||||
|
}
|
||||||
|
if self.node_ref(self.back).is_none() {
|
||||||
|
self.back = idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
let node = self.nodes.get_mut(idx).unwrap().insert(FixedSizeListNode {
|
||||||
|
prev: usize::MAX,
|
||||||
|
next: self.front,
|
||||||
|
data: node.data,
|
||||||
|
});
|
||||||
|
self.front = idx;
|
||||||
|
Some(&mut node.data)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn push_front(&mut self, data: T) -> Option<(usize, &mut T)> {
|
||||||
|
let idx = self.next()?;
|
||||||
|
if let Some(front) = self.node_mut(self.front) {
|
||||||
|
front.prev = idx;
|
||||||
|
}
|
||||||
|
if self.node_ref(self.back).is_none() {
|
||||||
|
self.back = idx;
|
||||||
|
}
|
||||||
|
let node = self.nodes.get_mut(idx).unwrap().insert(FixedSizeListNode {
|
||||||
|
prev: usize::MAX,
|
||||||
|
next: self.front,
|
||||||
|
data,
|
||||||
|
});
|
||||||
|
self.front = idx;
|
||||||
|
Some((idx, &mut node.data))
|
||||||
|
}
|
||||||
|
}
|
@ -10,6 +10,7 @@ mod document_change;
|
|||||||
mod extract;
|
mod extract;
|
||||||
pub mod indexer;
|
pub mod indexer;
|
||||||
mod items_pool;
|
mod items_pool;
|
||||||
|
mod lru;
|
||||||
mod merger;
|
mod merger;
|
||||||
mod top_level_map;
|
mod top_level_map;
|
||||||
mod word_fst_builder;
|
mod word_fst_builder;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user