mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 16:24:26 +01:00
Disable sled logging
This commit is contained in:
parent
ca332883cc
commit
4d92df1b95
64
Cargo.lock
generated
64
Cargo.lock
generated
@ -1879,16 +1879,6 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fs2"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fst"
|
||||
version = "0.4.7"
|
||||
@ -3404,7 +3394,7 @@ dependencies = [
|
||||
"obkv",
|
||||
"once_cell",
|
||||
"ordered-float",
|
||||
"parking_lot 0.12.3",
|
||||
"parking_lot",
|
||||
"permissive-json-pointer",
|
||||
"pin-project-lite",
|
||||
"platform-dirs",
|
||||
@ -3577,7 +3567,6 @@ dependencies = [
|
||||
"rstar",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sled",
|
||||
"slice-group-by",
|
||||
"smallstr",
|
||||
"smallvec",
|
||||
@ -3891,17 +3880,6 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
|
||||
dependencies = [
|
||||
"instant",
|
||||
"lock_api",
|
||||
"parking_lot_core 0.8.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.12.3"
|
||||
@ -3909,21 +3887,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
"parking_lot_core 0.9.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot_core"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"instant",
|
||||
"libc",
|
||||
"redox_syscall 0.2.16",
|
||||
"smallvec",
|
||||
"winapi",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -4265,7 +4229,7 @@ dependencies = [
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"memchr",
|
||||
"parking_lot 0.12.3",
|
||||
"parking_lot",
|
||||
"procfs",
|
||||
"protobuf",
|
||||
"thiserror",
|
||||
@ -5023,22 +4987,6 @@ dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sled"
|
||||
version = "0.34.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
"fs2",
|
||||
"fxhash",
|
||||
"libc",
|
||||
"log",
|
||||
"parking_lot 0.11.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slice-group-by"
|
||||
version = "0.3.1"
|
||||
@ -5300,7 +5248,7 @@ version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96374855068f47402c3121c6eed88d29cb1de8f3ab27090e273e420bdabcf050"
|
||||
dependencies = [
|
||||
"parking_lot 0.12.3",
|
||||
"parking_lot",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -5374,7 +5322,7 @@ dependencies = [
|
||||
"bstr",
|
||||
"fancy-regex 0.12.0",
|
||||
"lazy_static",
|
||||
"parking_lot 0.12.3",
|
||||
"parking_lot",
|
||||
"rustc-hash",
|
||||
]
|
||||
|
||||
@ -5486,7 +5434,7 @@ dependencies = [
|
||||
"libc",
|
||||
"mio",
|
||||
"num_cpus",
|
||||
"parking_lot 0.12.3",
|
||||
"parking_lot",
|
||||
"pin-project-lite",
|
||||
"signal-hook-registry",
|
||||
"socket2 0.5.5",
|
||||
|
@ -67,8 +67,6 @@ filter-parser = { path = "../filter-parser" }
|
||||
# documents words self-join
|
||||
itertools = "0.13.0"
|
||||
|
||||
sled = "0.34.7"
|
||||
|
||||
csv = "1.3.0"
|
||||
candle-core = { version = "0.6.0" }
|
||||
candle-transformers = { version = "0.6.0" }
|
||||
|
@ -1,10 +1,8 @@
|
||||
use std::borrow::{Borrow, Cow};
|
||||
use std::hash::Hash;
|
||||
use std::iter::Chain;
|
||||
use std::borrow::Cow;
|
||||
use std::mem;
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use lru::{IntoIter, LruCache};
|
||||
use lru::LruCache;
|
||||
use roaring::RoaringBitmap;
|
||||
use smallvec::SmallVec;
|
||||
|
||||
@ -15,27 +13,18 @@ const ENABLED: bool = true;
|
||||
|
||||
pub struct SorterCacheDelAddCboRoaringBitmap<const N: usize, MF> {
|
||||
cache: LruCache<SmallVec<[u8; N]>, DelAddRoaringBitmap>,
|
||||
prefix: &'static [u8; 3],
|
||||
sorter: grenad::Sorter<MF>,
|
||||
deladd_buffer: Vec<u8>,
|
||||
cbo_buffer: Vec<u8>,
|
||||
conn: sled::Db,
|
||||
}
|
||||
|
||||
impl<const N: usize, MF> SorterCacheDelAddCboRoaringBitmap<N, MF> {
|
||||
pub fn new(
|
||||
cap: NonZeroUsize,
|
||||
sorter: grenad::Sorter<MF>,
|
||||
prefix: &'static [u8; 3],
|
||||
conn: sled::Db,
|
||||
) -> Self {
|
||||
pub fn new(cap: NonZeroUsize, sorter: grenad::Sorter<MF>) -> Self {
|
||||
SorterCacheDelAddCboRoaringBitmap {
|
||||
cache: LruCache::new(cap),
|
||||
prefix,
|
||||
sorter,
|
||||
deladd_buffer: Vec::new(),
|
||||
cbo_buffer: Vec::new(),
|
||||
conn,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -56,7 +45,7 @@ where
|
||||
}
|
||||
None => {
|
||||
let value = DelAddRoaringBitmap::new_del_u32(n);
|
||||
for (key, deladd) in self.cache.push(key.into(), value) {
|
||||
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
||||
self.write_entry_to_sorter(key, deladd)?;
|
||||
}
|
||||
}
|
||||
@ -81,7 +70,7 @@ where
|
||||
}
|
||||
None => {
|
||||
let value = DelAddRoaringBitmap::new_del(bitmap);
|
||||
for (key, deladd) in self.cache.push(key.into(), value) {
|
||||
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
||||
self.write_entry_to_sorter(key, deladd)?;
|
||||
}
|
||||
}
|
||||
@ -102,7 +91,7 @@ where
|
||||
}
|
||||
None => {
|
||||
let value = DelAddRoaringBitmap::new_add_u32(n);
|
||||
for (key, deladd) in self.cache.push(key.into(), value) {
|
||||
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
||||
self.write_entry_to_sorter(key, deladd)?;
|
||||
}
|
||||
}
|
||||
@ -127,7 +116,7 @@ where
|
||||
}
|
||||
None => {
|
||||
let value = DelAddRoaringBitmap::new_add(bitmap);
|
||||
for (key, deladd) in self.cache.push(key.into(), value) {
|
||||
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
||||
self.write_entry_to_sorter(key, deladd)?;
|
||||
}
|
||||
}
|
||||
@ -149,7 +138,7 @@ where
|
||||
}
|
||||
None => {
|
||||
let value = DelAddRoaringBitmap::new_del_add_u32(n);
|
||||
for (key, deladd) in self.cache.push(key.into(), value) {
|
||||
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
||||
self.write_entry_to_sorter(key, deladd)?;
|
||||
}
|
||||
}
|
||||
@ -187,18 +176,10 @@ where
|
||||
}
|
||||
DelAddRoaringBitmap { del: None, add: None } => return Ok(()),
|
||||
}
|
||||
self.cbo_buffer.clear();
|
||||
self.cbo_buffer.extend_from_slice(self.prefix);
|
||||
self.cbo_buffer.extend_from_slice(key.as_ref());
|
||||
self.conn.merge(&self.cbo_buffer, 1u32.to_ne_bytes()).unwrap();
|
||||
self.sorter.insert(key, value_writer.into_inner().unwrap())
|
||||
}
|
||||
|
||||
pub fn direct_insert(&mut self, key: &[u8], val: &[u8]) -> Result<(), grenad::Error<U>> {
|
||||
self.cbo_buffer.clear();
|
||||
self.cbo_buffer.extend_from_slice(self.prefix);
|
||||
self.cbo_buffer.extend_from_slice(key);
|
||||
self.conn.merge(&self.cbo_buffer, 1u32.to_ne_bytes()).unwrap();
|
||||
self.sorter.insert(key, val)
|
||||
}
|
||||
|
||||
@ -240,167 +221,3 @@ impl DelAddRoaringBitmap {
|
||||
DelAddRoaringBitmap { del: None, add: Some(RoaringBitmap::from([n])) }
|
||||
}
|
||||
}
|
||||
|
||||
// TODO support custom State (3rd param S of LruCache)
|
||||
pub struct ArcCache<K, V> {
|
||||
recent_set: LruCache<K, V>,
|
||||
recent_evicted: LruCache<K, ()>,
|
||||
frequent_set: LruCache<K, V>,
|
||||
frequent_evicted: LruCache<K, ()>,
|
||||
capacity: NonZeroUsize,
|
||||
p: usize,
|
||||
}
|
||||
|
||||
impl<K: Eq + Hash, V> ArcCache<K, V> {
|
||||
pub fn new(cap: NonZeroUsize) -> Self {
|
||||
ArcCache {
|
||||
recent_set: LruCache::new(cap),
|
||||
recent_evicted: LruCache::new(cap),
|
||||
frequent_set: LruCache::new(cap),
|
||||
frequent_evicted: LruCache::new(cap),
|
||||
capacity: cap,
|
||||
p: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<K: Eq + Hash + Clone, V> ArcCache<K, V> {
|
||||
fn get_mut<Q>(&mut self, k: &Q) -> (Option<&mut V>, Option<(K, V)>)
|
||||
where
|
||||
K: Borrow<Q>,
|
||||
Q: Hash + Eq + ?Sized,
|
||||
{
|
||||
if let Some((key, value)) = self.recent_set.pop_entry(k) {
|
||||
let evicted = self.frequent_set.push(key, value);
|
||||
(self.frequent_set.get_mut(k), evicted)
|
||||
} else {
|
||||
(self.frequent_set.get_mut(k), None)
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, key: K, value: V) -> Vec<(K, V)> {
|
||||
let mut evicted = Vec::new();
|
||||
|
||||
if self.recent_set.contains(&key) {
|
||||
if let Some(evicted_entry) = self.recent_set.pop_entry(&key) {
|
||||
evicted.push(evicted_entry);
|
||||
}
|
||||
if let Some(evicted_entry) = self.frequent_set.push(key, value) {
|
||||
evicted.push(evicted_entry);
|
||||
}
|
||||
return evicted;
|
||||
}
|
||||
|
||||
if self.frequent_set.contains(&key) {
|
||||
if let Some(evicted_entry) = self.frequent_set.push(key, value) {
|
||||
evicted.push(evicted_entry);
|
||||
}
|
||||
return evicted;
|
||||
}
|
||||
|
||||
if self.recent_set.len() + self.frequent_set.len() == self.capacity.get() {
|
||||
if self.recent_set.len() < self.capacity.get() {
|
||||
if self.recent_set.len() + self.recent_evicted.len() == self.capacity.get() {
|
||||
self.recent_evicted.pop_lru();
|
||||
}
|
||||
if let Some((lru_key, lru_value)) = self.frequent_set.pop_lru() {
|
||||
self.frequent_evicted.put(lru_key.clone(), ());
|
||||
evicted.push((lru_key, lru_value));
|
||||
}
|
||||
} else if let Some((lru_key, lru_value)) = self.recent_set.pop_lru() {
|
||||
self.recent_evicted.put(lru_key.clone(), ());
|
||||
evicted.push((lru_key, lru_value));
|
||||
}
|
||||
}
|
||||
|
||||
if self.recent_evicted.contains(&key) {
|
||||
let delta = if self.recent_evicted.len() >= self.frequent_evicted.len() {
|
||||
1
|
||||
} else {
|
||||
self.frequent_evicted.len() / self.recent_evicted.len()
|
||||
};
|
||||
|
||||
self.p = (self.p + delta).min(self.capacity.get());
|
||||
if let Some(evicted_entry) = self.replace(&key) {
|
||||
evicted.push(evicted_entry);
|
||||
}
|
||||
self.recent_evicted.pop(&key);
|
||||
if let Some(evicted_entry) = self.frequent_set.push(key, value) {
|
||||
evicted.push(evicted_entry);
|
||||
}
|
||||
} else if self.frequent_evicted.contains(&key) {
|
||||
let delta = if self.frequent_evicted.len() >= self.recent_evicted.len() {
|
||||
1
|
||||
} else {
|
||||
self.recent_evicted.len() / self.frequent_evicted.len()
|
||||
};
|
||||
self.p = self.p.saturating_sub(delta);
|
||||
if let Some(evicted_entry) = self.replace(&key) {
|
||||
evicted.push(evicted_entry);
|
||||
}
|
||||
self.frequent_evicted.pop(&key);
|
||||
if let Some(evicted_entry) = self.frequent_set.push(key, value) {
|
||||
evicted.push(evicted_entry);
|
||||
}
|
||||
} else {
|
||||
if self.recent_set.len() + self.recent_evicted.len() == self.capacity.get() {
|
||||
if self.recent_set.len() < self.capacity.get() {
|
||||
self.recent_evicted.pop_lru();
|
||||
if let Some(evicted_entry) = self.replace(&key) {
|
||||
evicted.push(evicted_entry);
|
||||
}
|
||||
} else if let Some(evicted_entry) = self.recent_set.pop_lru() {
|
||||
evicted.push(evicted_entry);
|
||||
}
|
||||
} else if self.recent_set.len()
|
||||
+ self.frequent_set.len()
|
||||
+ self.recent_evicted.len()
|
||||
+ self.frequent_evicted.len()
|
||||
>= self.capacity.get()
|
||||
{
|
||||
if self.recent_set.len()
|
||||
+ self.frequent_set.len()
|
||||
+ self.recent_evicted.len()
|
||||
+ self.frequent_evicted.len()
|
||||
== 2 * self.capacity.get()
|
||||
{
|
||||
self.frequent_evicted.pop_lru();
|
||||
}
|
||||
if let Some(evicted_entry) = self.replace(&key) {
|
||||
evicted.push(evicted_entry);
|
||||
}
|
||||
}
|
||||
if let Some(evicted_entry) = self.recent_set.push(key, value) {
|
||||
evicted.push(evicted_entry);
|
||||
}
|
||||
}
|
||||
|
||||
evicted
|
||||
}
|
||||
|
||||
fn replace(&mut self, key: &K) -> Option<(K, V)> {
|
||||
if !self.recent_set.is_empty()
|
||||
&& (self.recent_set.len() > self.p
|
||||
|| (self.frequent_evicted.contains(key) && self.recent_set.len() == self.p))
|
||||
{
|
||||
if let Some((lru_key, lru_value)) = self.recent_set.pop_lru() {
|
||||
self.recent_evicted.put(lru_key.clone(), ());
|
||||
return Some((lru_key, lru_value));
|
||||
}
|
||||
} else if let Some((lru_key, lru_value)) = self.frequent_set.pop_lru() {
|
||||
self.frequent_evicted.put(lru_key.clone(), ());
|
||||
return Some((lru_key, lru_value));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<K: Hash + Eq, V> IntoIterator for ArcCache<K, V> {
|
||||
type Item = (K, V);
|
||||
type IntoIter = Chain<IntoIter<K, V>, IntoIter<K, V>>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.recent_set.into_iter().chain(self.frequent_set)
|
||||
}
|
||||
}
|
||||
|
@ -29,8 +29,6 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
|
||||
let conn = super::SLED_DB.clone();
|
||||
|
||||
let max_positions_per_attributes = max_positions_per_attributes
|
||||
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
@ -152,7 +150,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
key_buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||
let mut key = b"dwp".to_vec();
|
||||
key.extend_from_slice(&key_buffer);
|
||||
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
|
||||
// conn.merge(key, 1u32.to_ne_bytes()).unwrap();
|
||||
docid_word_positions_sorter.insert(&key_buffer, value)?;
|
||||
}
|
||||
|
||||
|
@ -40,8 +40,6 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
||||
SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
|
||||
NonZeroUsize::new(1000).unwrap(),
|
||||
facet_number_docids_sorter,
|
||||
b"fnd",
|
||||
super::SLED_DB.clone(),
|
||||
);
|
||||
|
||||
let mut cursor = fid_docid_facet_number.into_cursor()?;
|
||||
|
@ -10,7 +10,6 @@ use heed::types::SerdeJson;
|
||||
use heed::BytesEncode;
|
||||
|
||||
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
||||
use super::SLED_DB;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
|
||||
use crate::heed_codec::{BEU16StrCodec, StrRefCodec};
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
@ -32,7 +31,6 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
_settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||
let conn = SLED_DB.clone();
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
let options = NormalizerOption { lossy: true, ..Default::default() };
|
||||
|
||||
@ -48,8 +46,6 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
|
||||
NonZeroUsize::new(1000).unwrap(),
|
||||
facet_string_docids_sorter,
|
||||
b"fsd",
|
||||
SLED_DB.clone(),
|
||||
);
|
||||
|
||||
let mut normalized_facet_string_docids_sorter = create_sorter(
|
||||
@ -108,7 +104,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
let key_bytes = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
|
||||
let mut key = b"nfs".to_vec();
|
||||
key.extend_from_slice(&key_bytes);
|
||||
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
|
||||
// conn.merge(key, 1u32.to_ne_bytes()).unwrap();
|
||||
normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?;
|
||||
}
|
||||
|
||||
|
@ -46,7 +46,6 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> Result<ExtractedFacetValues> {
|
||||
let mut conn = super::SLED_DB.clone();
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut fid_docid_facet_numbers_sorter = create_sorter(
|
||||
@ -170,22 +169,20 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
add_value.map(|value| extract_facet_values(&value, add_geo_support));
|
||||
|
||||
// Those closures are just here to simplify things a bit.
|
||||
let mut insert_numbers_diff = |del_numbers, add_numbers, conn| {
|
||||
let mut insert_numbers_diff = |del_numbers, add_numbers| {
|
||||
insert_numbers_diff(
|
||||
&mut fid_docid_facet_numbers_sorter,
|
||||
&mut numbers_key_buffer,
|
||||
del_numbers,
|
||||
add_numbers,
|
||||
conn,
|
||||
)
|
||||
};
|
||||
let mut insert_strings_diff = |del_strings, add_strings, conn| {
|
||||
let mut insert_strings_diff = |del_strings, add_strings| {
|
||||
insert_strings_diff(
|
||||
&mut fid_docid_facet_strings_sorter,
|
||||
&mut strings_key_buffer,
|
||||
del_strings,
|
||||
add_strings,
|
||||
conn,
|
||||
)
|
||||
};
|
||||
|
||||
@ -199,8 +196,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
del_is_empty.insert(document);
|
||||
}
|
||||
Values { numbers, strings } => {
|
||||
insert_numbers_diff(numbers, vec![], &mut conn)?;
|
||||
insert_strings_diff(strings, vec![], &mut conn)?;
|
||||
insert_numbers_diff(numbers, vec![])?;
|
||||
insert_strings_diff(strings, vec![])?;
|
||||
}
|
||||
},
|
||||
(None, Some(add_filterable_values)) => match add_filterable_values {
|
||||
@ -211,8 +208,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
add_is_empty.insert(document);
|
||||
}
|
||||
Values { numbers, strings } => {
|
||||
insert_numbers_diff(vec![], numbers, &mut conn)?;
|
||||
insert_strings_diff(vec![], strings, &mut conn)?;
|
||||
insert_numbers_diff(vec![], numbers)?;
|
||||
insert_strings_diff(vec![], strings)?;
|
||||
}
|
||||
},
|
||||
(Some(del_filterable_values), Some(add_filterable_values)) => {
|
||||
@ -227,31 +224,31 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
add_is_null.insert(document);
|
||||
}
|
||||
(Null, Values { numbers, strings }) => {
|
||||
insert_numbers_diff(vec![], numbers, &mut conn)?;
|
||||
insert_strings_diff(vec![], strings, &mut conn)?;
|
||||
insert_numbers_diff(vec![], numbers)?;
|
||||
insert_strings_diff(vec![], strings)?;
|
||||
del_is_null.insert(document);
|
||||
}
|
||||
(Empty, Values { numbers, strings }) => {
|
||||
insert_numbers_diff(vec![], numbers, &mut conn)?;
|
||||
insert_strings_diff(vec![], strings, &mut conn)?;
|
||||
insert_numbers_diff(vec![], numbers)?;
|
||||
insert_strings_diff(vec![], strings)?;
|
||||
del_is_empty.insert(document);
|
||||
}
|
||||
(Values { numbers, strings }, Null) => {
|
||||
add_is_null.insert(document);
|
||||
insert_numbers_diff(numbers, vec![], &mut conn)?;
|
||||
insert_strings_diff(strings, vec![], &mut conn)?;
|
||||
insert_numbers_diff(numbers, vec![])?;
|
||||
insert_strings_diff(strings, vec![])?;
|
||||
}
|
||||
(Values { numbers, strings }, Empty) => {
|
||||
add_is_empty.insert(document);
|
||||
insert_numbers_diff(numbers, vec![], &mut conn)?;
|
||||
insert_strings_diff(strings, vec![], &mut conn)?;
|
||||
insert_numbers_diff(numbers, vec![])?;
|
||||
insert_strings_diff(strings, vec![])?;
|
||||
}
|
||||
(
|
||||
Values { numbers: del_numbers, strings: del_strings },
|
||||
Values { numbers: add_numbers, strings: add_strings },
|
||||
) => {
|
||||
insert_numbers_diff(del_numbers, add_numbers, &mut conn)?;
|
||||
insert_strings_diff(del_strings, add_strings, &mut conn)?;
|
||||
insert_numbers_diff(del_numbers, add_numbers)?;
|
||||
insert_strings_diff(del_strings, add_strings)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -334,7 +331,6 @@ fn insert_numbers_diff<MF>(
|
||||
key_buffer: &mut Vec<u8>,
|
||||
mut del_numbers: Vec<f64>,
|
||||
mut add_numbers: Vec<f64>,
|
||||
conn: &mut sled::Db,
|
||||
) -> Result<()>
|
||||
where
|
||||
MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
|
||||
@ -366,9 +362,6 @@ where
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
obkv.insert(DelAdd::Deletion, bytes_of(&()))?;
|
||||
let bytes = obkv.into_inner()?;
|
||||
let mut key = b"dfn".to_vec();
|
||||
key.extend_from_slice(key_buffer);
|
||||
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
|
||||
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
|
||||
}
|
||||
}
|
||||
@ -382,9 +375,6 @@ where
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
obkv.insert(DelAdd::Addition, bytes_of(&()))?;
|
||||
let bytes = obkv.into_inner()?;
|
||||
let mut key = b"dfn".to_vec();
|
||||
key.extend_from_slice(key_buffer);
|
||||
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
|
||||
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
|
||||
}
|
||||
}
|
||||
@ -401,7 +391,6 @@ fn insert_strings_diff<MF>(
|
||||
key_buffer: &mut Vec<u8>,
|
||||
mut del_strings: Vec<(String, String)>,
|
||||
mut add_strings: Vec<(String, String)>,
|
||||
conn: &mut sled::Db,
|
||||
) -> Result<()>
|
||||
where
|
||||
MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
|
||||
@ -430,9 +419,6 @@ where
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
obkv.insert(DelAdd::Deletion, original)?;
|
||||
let bytes = obkv.into_inner()?;
|
||||
let mut key = b"dfs".to_vec();
|
||||
key.extend_from_slice(key_buffer);
|
||||
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
|
||||
fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
|
||||
}
|
||||
EitherOrBoth::Right((normalized, original)) => {
|
||||
@ -442,9 +428,6 @@ where
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
obkv.insert(DelAdd::Addition, original)?;
|
||||
let bytes = obkv.into_inner()?;
|
||||
let mut key = b"dfs".to_vec();
|
||||
key.extend_from_slice(key_buffer);
|
||||
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
|
||||
fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
|
||||
}
|
||||
}
|
||||
|
@ -43,8 +43,6 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
||||
SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
|
||||
NonZeroUsize::new(1000).unwrap(),
|
||||
fid_word_count_docids_sorter,
|
||||
b"fwc",
|
||||
super::SLED_DB.clone(),
|
||||
);
|
||||
|
||||
let mut key_buffer = Vec::new();
|
||||
|
@ -11,7 +11,6 @@ use super::helpers::{
|
||||
create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, try_split_array_at,
|
||||
writer_into_reader, GrenadParameters,
|
||||
};
|
||||
use super::SLED_DB;
|
||||
use crate::error::SerializationError;
|
||||
use crate::heed_codec::StrBEU16Codec;
|
||||
use crate::index::db_name::DOCID_WORD_POSITIONS;
|
||||
@ -52,8 +51,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
let mut cached_word_fid_docids_sorter = SorterCacheDelAddCboRoaringBitmap::<20, _>::new(
|
||||
NonZeroUsize::new(1000).unwrap(),
|
||||
word_fid_docids_sorter,
|
||||
b"wfd",
|
||||
SLED_DB.clone(),
|
||||
);
|
||||
|
||||
let mut key_buffer = Vec::new();
|
||||
@ -113,8 +110,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
let mut cached_word_docids_sorter = SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
|
||||
NonZeroUsize::new(1000).unwrap(),
|
||||
word_docids_sorter,
|
||||
b"wdi",
|
||||
SLED_DB.clone(),
|
||||
);
|
||||
|
||||
let exact_word_docids_sorter = create_sorter(
|
||||
@ -128,8 +123,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
let mut cached_exact_word_docids_sorter = SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
|
||||
NonZeroUsize::new(1000).unwrap(),
|
||||
exact_word_docids_sorter,
|
||||
b"ewd",
|
||||
SLED_DB.clone(),
|
||||
);
|
||||
|
||||
let mut iter = cached_word_fid_docids_sorter.into_sorter()?.into_stream_merger_iter()?;
|
||||
@ -221,7 +214,6 @@ fn docids_into_writers<W>(
|
||||
deletions: &RoaringBitmap,
|
||||
additions: &RoaringBitmap,
|
||||
writer: &mut grenad::Writer<W>,
|
||||
conn: &mut sled::Db,
|
||||
) -> Result<()>
|
||||
where
|
||||
W: std::io::Write,
|
||||
@ -253,9 +245,6 @@ where
|
||||
}
|
||||
|
||||
// insert everything in the same writer.
|
||||
let mut key = b"wod".to_vec();
|
||||
key.extend_from_slice(word.as_bytes());
|
||||
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
|
||||
writer.insert(word.as_bytes(), obkv.into_inner().unwrap())?;
|
||||
|
||||
Ok(())
|
||||
|
@ -55,8 +55,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
|
||||
NonZeroUsize::new(1000).unwrap(),
|
||||
sorter,
|
||||
b"wpp",
|
||||
super::SLED_DB.clone(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
@ -41,8 +41,6 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
|
||||
NonZeroUsize::new(1000).unwrap(),
|
||||
word_position_docids_sorter,
|
||||
b"wpd",
|
||||
super::SLED_DB.clone(),
|
||||
);
|
||||
|
||||
let mut del_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new();
|
||||
|
@ -35,23 +35,6 @@ use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::error::PossibleEmbeddingMistakes;
|
||||
use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
||||
|
||||
pub static SLED_DB: once_cell::sync::Lazy<sled::Db> = once_cell::sync::Lazy::new(|| {
|
||||
fn increment_u32(
|
||||
_key: &[u8],
|
||||
old_value: Option<&[u8]>,
|
||||
merged_bytes: &[u8],
|
||||
) -> Option<Vec<u8>> {
|
||||
let current_count = old_value.map_or(0, |b| b.try_into().map(u32::from_ne_bytes).unwrap());
|
||||
let new_count = merged_bytes.try_into().map(u32::from_ne_bytes).unwrap();
|
||||
let count = current_count.saturating_add(new_count).to_ne_bytes();
|
||||
Some(count.to_vec())
|
||||
}
|
||||
|
||||
let db = sled::open("write-stats.sled").unwrap();
|
||||
db.set_merge_operator(increment_u32);
|
||||
db
|
||||
});
|
||||
|
||||
/// Extract data for each databases from obkv documents in parallel.
|
||||
/// Send data in grenad file over provided Sender.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
|
@ -14,7 +14,6 @@ use std::result::Result as StdResult;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crossbeam_channel::{Receiver, Sender};
|
||||
pub use extract::SLED_DB;
|
||||
use grenad::{Merger, MergerBuilder};
|
||||
use heed::types::Str;
|
||||
use heed::Database;
|
||||
|
@ -6,7 +6,6 @@ use heed::types::Str;
|
||||
use heed::Database;
|
||||
|
||||
use super::index_documents::cache::SorterCacheDelAddCboRoaringBitmap;
|
||||
use super::index_documents::SLED_DB;
|
||||
use crate::update::del_add::deladd_serialize_add_side;
|
||||
use crate::update::index_documents::{
|
||||
create_sorter, merge_deladd_cbo_roaring_bitmaps,
|
||||
@ -68,8 +67,6 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
||||
let mut cached_prefix_docids_sorter = SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
|
||||
NonZeroUsize::new(1000).unwrap(),
|
||||
prefix_docids_sorter,
|
||||
b"pdi",
|
||||
SLED_DB.clone(),
|
||||
);
|
||||
|
||||
if !common_prefix_fst_words.is_empty() {
|
||||
|
@ -15,7 +15,7 @@ use crate::update::index_documents::cache::SorterCacheDelAddCboRoaringBitmap;
|
||||
use crate::update::index_documents::{
|
||||
create_sorter, merge_deladd_cbo_roaring_bitmaps,
|
||||
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
|
||||
write_sorter_into_database, CursorClonableMmap, MergeFn, SLED_DB,
|
||||
write_sorter_into_database, CursorClonableMmap, MergeFn,
|
||||
};
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
|
||||
@ -73,8 +73,6 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
||||
SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
|
||||
NonZeroUsize::new(1000).unwrap(),
|
||||
prefix_integer_docids_sorter,
|
||||
b"pid",
|
||||
SLED_DB.clone(),
|
||||
);
|
||||
|
||||
if !common_prefix_fst_words.is_empty() {
|
||||
|
Loading…
Reference in New Issue
Block a user