MeiliSearch/index-scheduler/src/lru.rs

//! Thread-safe `Vec`-backend LRU cache using [`std::sync::atomic::AtomicU64`] for synchronization.

use std::sync::atomic::{AtomicU64, Ordering};

/// Thread-safe `Vec`-backend LRU cache
#[derive(Debug)]
pub struct Lru<T> {
    data: Vec<(AtomicU64, T)>,
    generation: AtomicU64,
    cap: usize,
}

impl<T> Lru<T> {
    /// Creates a new LRU cache with the specified capacity.
    ///
    /// The capacity is allocated up-front, and will never change through a [`Self::put`] operation.
    ///
    /// # Panics
    ///
    /// - If the capacity is 0.
    /// - If the capacity exceeds `isize::MAX` bytes.
    pub fn new(cap: usize) -> Self {
        assert_ne!(cap, 0, "The capacity of a cache cannot be 0");
        Self {
            // Note: since the element of the vector contains an AtomicU64, it is definitely not zero-sized so cap will never be usize::MAX.
            data: Vec::with_capacity(cap),
            generation: AtomicU64::new(0),
            cap,
        }
    }

    /// The capacity of this LRU cache, that is the maximum number of elements it can hold before evicting elements from the cache.
    ///
    /// The cache will contain at most this number of elements at any given time.
    pub fn capacity(&self) -> usize {
        self.cap
    }

    fn next_generation(&self) -> u64 {
        // Acquire so this "happens-before" any potential store to a data cell (with Release ordering)
        let generation = self.generation.fetch_add(1, Ordering::Acquire);
        generation + 1
    }

    fn next_generation_mut(&mut self) -> u64 {
        let generation = self.generation.get_mut();
        *generation += 1;
        *generation
    }

    /// Add a value in the cache, evicting an older value if necessary.
    ///
    /// If a value was evicted from the cache, it is returned.
    ///
    /// # Complexity
    ///
    /// - If the cache is full, then linear in the capacity.
    /// - Otherwise constant.
    pub fn put(&mut self, value: T) -> Option<T> {
        // no need for a memory fence: we assume that whichever mechanism provides us synchronization
        // (very probably, a RwLock) takes care of fencing for us.

        let next_generation = self.next_generation_mut();
        let evicted = if self.is_full() { self.pop() } else { None };
        self.data.push((AtomicU64::new(next_generation), value));
        evicted
    }

    /// Evict the oldest value from the cache.
    ///
    /// If the cache is empty, `None` will be returned.
    ///
    /// # Complexity
    ///
    /// - Linear in the capacity of the cache.
    pub fn pop(&mut self) -> Option<T> {
        // Don't use `Iterator::min_by_key` that provides shared references to its elements,
        // so that we can get an exclusive one.
        // This allows to handles the `AtomicU64`s as normal integers without using atomic instructions.
        let mut min_generation_index = None;
        for (index, (generation, _)) in self.data.iter_mut().enumerate() {
            let generation = *generation.get_mut();
            if let Some((_, min_generation)) = min_generation_index {
                if min_generation > generation {
                    min_generation_index = Some((index, generation));
                }
            } else {
                min_generation_index = Some((index, generation))
            }
        }
        min_generation_index.map(|(min_index, _)| self.data.swap_remove(min_index).1)
    }

    /// The current number of elements in the cache.
    ///
    /// This value is guaranteed to be less than or equal to [`Self::capacity`].
    pub fn len(&self) -> usize {
        self.data.len()
    }

    /// Returns `true` if putting any additional element in the cache would cause the eviction of an element.
    pub fn is_full(&self) -> bool {
        self.len() == self.capacity()
    }
}

pub struct LruMap<K, V>(Lru<(K, V)>);

impl<K, V> LruMap<K, V>
where
    K: Eq,
{
    /// Creates a new LRU cache map with the specified capacity.
    ///
    /// The capacity is allocated up-front, and will never change through a [`Self::insert`] operation.
    ///
    /// # Panics
    ///
    /// - If the capacity is 0.
    /// - If the capacity exceeds `isize::MAX` bytes.
    pub fn new(cap: usize) -> Self {
        Self(Lru::new(cap))
    }

    /// Gets a value in the cache map by its key.
    ///
    /// If no value matches, `None` will be returned.
    ///
    /// # Complexity
    ///
    /// - Linear in the capacity of the cache.
    pub fn get(&self, key: &K) -> Option<&V> {
        for (generation, (candidate, value)) in self.0.data.iter() {
            if key == candidate {
                generation.store(self.0.next_generation(), Ordering::Release);
                return Some(value);
            }
        }
        None
    }

    /// Gets a value in the cache map by its key.
    ///
    /// If no value matches, `None` will be returned.
    ///
    /// # Complexity
    ///
    /// - Linear in the capacity of the cache.
    pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
        let next_generation = self.0.next_generation_mut();
        for (generation, (candidate, value)) in self.0.data.iter_mut() {
            if key == candidate {
                *generation.get_mut() = next_generation;
                return Some(value);
            }
        }
        None
    }

    /// Inserts a value in the cache map by its key, replacing any existing value and returning any evicted value.
    ///
    /// # Complexity
    ///
    /// - Linear in the capacity of the cache.
    pub fn insert(&mut self, key: K, mut value: V) -> InsertionOutcome<K, V> {
        match self.get_mut(&key) {
            Some(old_value) => {
                std::mem::swap(old_value, &mut value);
                InsertionOutcome::Replaced(value)
            }
            None => match self.0.put((key, value)) {
                Some((key, value)) => InsertionOutcome::Evicted(key, value),
                None => InsertionOutcome::InsertedNew,
            },
        }
    }

    /// Removes an element from the cache map by its key, returning its value.
    ///
    /// Returns `None` if there was no element with this key in the cache.
    ///
    /// # Complexity
    ///
    /// - Linear in the capacity of the cache.
    pub fn remove(&mut self, key: &K) -> Option<V> {
        for (index, (_, (candidate, _))) in self.0.data.iter_mut().enumerate() {
            if key == candidate {
                return Some(self.0.data.swap_remove(index).1 .1);
            }
        }
        None
    }
}

/// The result of an insertion in a LRU map.
pub enum InsertionOutcome<K, V> {
    /// The key was not in the cache, the key-value pair has been inserted.
    InsertedNew,
    /// The key was not in the cache and an old key-value pair was evicted from the cache to make room for its insertions.
    Evicted(K, V),
    /// The key was already in the cache map, its value has been updated.
    Replaced(V),
}
Add LruMap 2023-01-11 17:30:50 +01:00			//! Thread-safe `Vec`-backend LRU cache using [`std::sync::atomic::AtomicU64`] for synchronization.

			`use std::sync::atomic::{AtomicU64, Ordering};`

			/// Thread-safe `Vec`-backend LRU cache
			`#[derive(Debug)]`
			`pub struct Lru<T> {`
			`data: Vec<(AtomicU64, T)>,`
			`generation: AtomicU64,`
			`cap: usize,`
			`}`

			`impl<T> Lru<T> {`
			`/// Creates a new LRU cache with the specified capacity.`
			`///`
			/// The capacity is allocated up-front, and will never change through a [`Self::put`] operation.
			`///`
			`/// # Panics`
			`///`
			`/// - If the capacity is 0.`
			/// - If the capacity exceeds `isize::MAX` bytes.
			`pub fn new(cap: usize) -> Self {`
			`assert_ne!(cap, 0, "The capacity of a cache cannot be 0");`
			`Self {`
			`// Note: since the element of the vector contains an AtomicU64, it is definitely not zero-sized so cap will never be usize::MAX.`
			`data: Vec::with_capacity(cap),`
			`generation: AtomicU64::new(0),`
			`cap,`
			`}`
			`}`

			`/// The capacity of this LRU cache, that is the maximum number of elements it can hold before evicting elements from the cache.`
			`///`
			`/// The cache will contain at most this number of elements at any given time.`
			`pub fn capacity(&self) -> usize {`
			`self.cap`
			`}`

			`fn next_generation(&self) -> u64 {`
			`// Acquire so this "happens-before" any potential store to a data cell (with Release ordering)`
			`let generation = self.generation.fetch_add(1, Ordering::Acquire);`
			`generation + 1`
			`}`

			`fn next_generation_mut(&mut self) -> u64 {`
			`let generation = self.generation.get_mut();`
			`*generation += 1;`
			`*generation`
			`}`

			`/// Add a value in the cache, evicting an older value if necessary.`
			`///`
			`/// If a value was evicted from the cache, it is returned.`
			`///`
			`/// # Complexity`
			`///`
			`/// - If the cache is full, then linear in the capacity.`
			`/// - Otherwise constant.`
			`pub fn put(&mut self, value: T) -> Option<T> {`
			`// no need for a memory fence: we assume that whichever mechanism provides us synchronization`
			`// (very probably, a RwLock) takes care of fencing for us.`

			`let next_generation = self.next_generation_mut();`
			`let evicted = if self.is_full() { self.pop() } else { None };`
			`self.data.push((AtomicU64::new(next_generation), value));`
			`evicted`
			`}`

			`/// Evict the oldest value from the cache.`
			`///`
			/// If the cache is empty, `None` will be returned.
			`///`
			`/// # Complexity`
			`///`
			`/// - Linear in the capacity of the cache.`
			`pub fn pop(&mut self) -> Option<T> {`
			// Don't use `Iterator::min_by_key` that provides shared references to its elements,
			`// so that we can get an exclusive one.`
			// This allows to handles the `AtomicU64`s as normal integers without using atomic instructions.
			`let mut min_generation_index = None;`
			`for (index, (generation, _)) in self.data.iter_mut().enumerate() {`
			`let generation = *generation.get_mut();`
			`if let Some((_, min_generation)) = min_generation_index {`
			`if min_generation > generation {`
			`min_generation_index = Some((index, generation));`
			`}`
			`} else {`
			`min_generation_index = Some((index, generation))`
			`}`
			`}`
			`min_generation_index.map(\|(min_index, _)\| self.data.swap_remove(min_index).1)`
			`}`

			`/// The current number of elements in the cache.`
			`///`
			/// This value is guaranteed to be less than or equal to [`Self::capacity`].
			`pub fn len(&self) -> usize {`
			`self.data.len()`
			`}`

			/// Returns `true` if putting any additional element in the cache would cause the eviction of an element.
			`pub fn is_full(&self) -> bool {`
			`self.len() == self.capacity()`
			`}`
			`}`

			`pub struct LruMap<K, V>(Lru<(K, V)>);`

			`impl<K, V> LruMap<K, V>`
			`where`
			`K: Eq,`
			`{`
			`/// Creates a new LRU cache map with the specified capacity.`
			`///`
			/// The capacity is allocated up-front, and will never change through a [`Self::insert`] operation.
			`///`
			`/// # Panics`
			`///`
			`/// - If the capacity is 0.`
			/// - If the capacity exceeds `isize::MAX` bytes.
			`pub fn new(cap: usize) -> Self {`
			`Self(Lru::new(cap))`
			`}`

			`/// Gets a value in the cache map by its key.`
			`///`
			/// If no value matches, `None` will be returned.
			`///`
			`/// # Complexity`
			`///`
			`/// - Linear in the capacity of the cache.`
			`pub fn get(&self, key: &K) -> Option<&V> {`
			`for (generation, (candidate, value)) in self.0.data.iter() {`
			`if key == candidate {`
			`generation.store(self.0.next_generation(), Ordering::Release);`
			`return Some(value);`
			`}`
			`}`
			`None`
			`}`

			`/// Gets a value in the cache map by its key.`
			`///`
			/// If no value matches, `None` will be returned.
			`///`
			`/// # Complexity`
			`///`
			`/// - Linear in the capacity of the cache.`
			`pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {`
			`let next_generation = self.0.next_generation_mut();`
			`for (generation, (candidate, value)) in self.0.data.iter_mut() {`
			`if key == candidate {`
			`*generation.get_mut() = next_generation;`
			`return Some(value);`
			`}`
			`}`
			`None`
			`}`

			`/// Inserts a value in the cache map by its key, replacing any existing value and returning any evicted value.`
			`///`
			`/// # Complexity`
			`///`
			`/// - Linear in the capacity of the cache.`
			`pub fn insert(&mut self, key: K, mut value: V) -> InsertionOutcome<K, V> {`
			`match self.get_mut(&key) {`
			`Some(old_value) => {`
			`std::mem::swap(old_value, &mut value);`
			`InsertionOutcome::Replaced(value)`
			`}`
			`None => match self.0.put((key, value)) {`
			`Some((key, value)) => InsertionOutcome::Evicted(key, value),`
			`None => InsertionOutcome::InsertedNew,`
			`},`
			`}`
			`}`

			`/// Removes an element from the cache map by its key, returning its value.`
			`///`
			/// Returns `None` if there was no element with this key in the cache.
			`///`
			`/// # Complexity`
			`///`
			`/// - Linear in the capacity of the cache.`
			`pub fn remove(&mut self, key: &K) -> Option<V> {`
			`for (index, (_, (candidate, _))) in self.0.data.iter_mut().enumerate() {`
			`if key == candidate {`
			`return Some(self.0.data.swap_remove(index).1 .1);`
			`}`
			`}`
			`None`
			`}`
			`}`

			`/// The result of an insertion in a LRU map.`
			`pub enum InsertionOutcome<K, V> {`
			`/// The key was not in the cache, the key-value pair has been inserted.`
			`InsertedNew,`
			`/// The key was not in the cache and an old key-value pair was evicted from the cache to make room for its insertions.`
			`Evicted(K, V),`
			`/// The key was already in the cache map, its value has been updated.`
			`Replaced(V),`
			`}`