From fc4013a43fdbb5b8dc9de8de6b9f801ed37f4122 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 1 Jul 2020 10:35:07 +0200 Subject: [PATCH] Fix the ARC cache --- src/bin/indexer.rs | 7 ++++--- src/cache.rs | 42 ++++++------------------------------------ 2 files changed, 10 insertions(+), 39 deletions(-) diff --git a/src/bin/indexer.rs b/src/bin/indexer.rs index d3bf30793..cc0604ea4 100644 --- a/src/bin/indexer.rs +++ b/src/bin/indexer.rs @@ -2,6 +2,7 @@ use std::collections::hash_map::Entry; use std::collections::{HashMap, BTreeSet}; use std::convert::{TryFrom, TryInto}; use std::io; +use std::ops::BitOr; use std::path::PathBuf; use std::sync::atomic::{AtomicUsize, Ordering}; @@ -74,7 +75,7 @@ fn index_csv(wtxn: &mut heed::RwTxn, mut rdr: csv::Reader, index None => { let mut ids = index.word_positions.get(wtxn, &word)?.unwrap_or_default(); ids.insert(position); - for (word, ids) in word_positions.insert(word.clone(), ids) { + for (word, ids) in word_positions.insert(word.clone(), ids, RoaringBitmap::bitor) { index.word_positions.put(wtxn, &word, &ids)?; } } @@ -90,7 +91,7 @@ fn index_csv(wtxn: &mut heed::RwTxn, mut rdr: csv::Reader, index None => { let mut ids = index.word_position_docids.get(wtxn, &key)?.unwrap_or_default(); ids.insert(position); - for ((word, position), ids) in word_position_docids.insert((word.clone(), position), ids) { + for ((word, position), ids) in word_position_docids.insert((word.clone(), position), ids, RoaringBitmap::bitor) { let mut key = word.as_bytes().to_vec(); key.extend_from_slice(&position.to_be_bytes()); index.word_position_docids.put(wtxn, &key, &ids)?; @@ -123,7 +124,7 @@ fn index_csv(wtxn: &mut heed::RwTxn, mut rdr: csv::Reader, index let iter = index.word_positions.as_polymorph().iter::<_, Str, DecodeIgnore>(wtxn)?; for result in iter { let (word, ()) = result?; - new_words.insert(word.clone()); + new_words.insert(word); } let new_words_fst = fst::Set::from_iter(new_words)?; diff --git a/src/cache.rs b/src/cache.rs index d90aa4294..ddff719fa 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -34,25 +34,6 @@ impl PartialEq for KeyRef { impl Eq for KeyRef {} -#[cfg(feature = "nightly")] -#[doc(hidden)] -pub auto trait NotKeyRef {} - -#[cfg(feature = "nightly")] -impl !NotKeyRef for KeyRef {} - -#[cfg(feature = "nightly")] -impl Borrow for KeyRef -where - K: Borrow, - D: NotKeyRef + ?Sized, -{ - fn borrow(&self) -> &D { - unsafe { &*self.k }.borrow() - } -} - -#[cfg(not(feature = "nightly"))] impl Borrow for KeyRef { fn borrow(&self) -> &K { unsafe { &*self.k } @@ -88,7 +69,7 @@ impl LruEntry { } } -/// An LRU Cache +/// An LRU Cache. pub struct LruCache { map: FastMap8, Box>>, cap: usize, @@ -100,13 +81,6 @@ pub struct LruCache { impl LruCache { /// Creates a new LRU Cache that holds at most `cap` items. - /// - /// # Example - /// - /// ``` - /// use lru::LruCache; - /// let mut cache: LruCache = LruCache::new(10); - /// ``` pub fn new(cap: usize) -> LruCache { let mut map = FastMap8::default(); map.reserve(cap); @@ -114,13 +88,6 @@ impl LruCache { } /// Creates a new LRU Cache that never automatically evicts items. - /// - /// # Example - /// - /// ``` - /// use lru::LruCache; - /// let mut cache: LruCache = LruCache::unbounded(); - /// ``` pub fn unbounded() -> LruCache { LruCache::construct(usize::MAX, HashMap::default()) } @@ -443,13 +410,16 @@ where } } - pub fn insert(&mut self, key: K, value: V) -> Vec<(K, V)> { + pub fn insert(&mut self, key: K, value: V, mut merge: F) -> Vec<(K, V)> + where F: FnMut(V, V) -> V + { let mut evicted = Vec::new(); if self.frequent_set.contains_key(&key) { evicted.extend(self.frequent_set.insert(key, value)); return evicted; } - if self.recent_set.remove(&key).is_some() { + if let Some(prev_value) = self.recent_set.remove(&key) { + let value = (merge)(prev_value, value); evicted.extend(self.frequent_set.insert(key, value)); return evicted; }