mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-10 13:34:30 +01:00
Merge #5145
5145: Use bumparaw-collections in Meilisearch/milli r=dureuill a=Kerollmops This PR is related to #5078. It uses the now published bumparaw-collections and (soon) makes the `RawMap` hasher nonrandom. Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
commit
e974be9518
33
Cargo.lock
generated
33
Cargo.lock
generated
@ -706,6 +706,20 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bumparaw-collections"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ce682bdc86c2e25ef5cd95881d9d6a1902214eddf74cf9ffea88fe1464377e8"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"bitpacking",
|
||||
"bumpalo",
|
||||
"hashbrown 0.15.1",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "byte-unit"
|
||||
version = "5.1.4"
|
||||
@ -2617,6 +2631,7 @@ dependencies = [
|
||||
"big_s",
|
||||
"bincode",
|
||||
"bumpalo",
|
||||
"bumparaw-collections",
|
||||
"crossbeam-channel",
|
||||
"csv",
|
||||
"derive_builder 0.20.0",
|
||||
@ -2631,7 +2646,6 @@ dependencies = [
|
||||
"meilisearch-types",
|
||||
"memmap2",
|
||||
"page_size",
|
||||
"raw-collections",
|
||||
"rayon",
|
||||
"roaring",
|
||||
"serde",
|
||||
@ -3549,6 +3563,7 @@ dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
"bumpalo",
|
||||
"bumparaw-collections",
|
||||
"convert_case 0.6.0",
|
||||
"csv",
|
||||
"deserr",
|
||||
@ -3561,7 +3576,6 @@ dependencies = [
|
||||
"meili-snap",
|
||||
"memmap2",
|
||||
"milli",
|
||||
"raw-collections",
|
||||
"roaring",
|
||||
"serde",
|
||||
"serde-cs",
|
||||
@ -3618,6 +3632,7 @@ dependencies = [
|
||||
"bincode",
|
||||
"bstr",
|
||||
"bumpalo",
|
||||
"bumparaw-collections",
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
"candle-core",
|
||||
@ -3656,7 +3671,6 @@ dependencies = [
|
||||
"once_cell",
|
||||
"ordered-float",
|
||||
"rand",
|
||||
"raw-collections",
|
||||
"rayon",
|
||||
"rayon-par-bridge",
|
||||
"rhai",
|
||||
@ -4487,19 +4501,6 @@ dependencies = [
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "raw-collections"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/meilisearch/raw-collections.git#15e5d7bdebc0c149b2a28b2454f307c717d07f8a"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"bitpacking",
|
||||
"bumpalo",
|
||||
"hashbrown 0.15.1",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "raw-cpuid"
|
||||
version = "10.7.0"
|
||||
|
@ -13,6 +13,8 @@ license.workspace = true
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
bincode = "1.3.3"
|
||||
bumpalo = "3.16.0"
|
||||
bumparaw-collections = "0.1.2"
|
||||
csv = "1.3.0"
|
||||
derive_builder = "0.20.0"
|
||||
dump = { path = "../dump" }
|
||||
@ -21,8 +23,8 @@ file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.30"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.4"
|
||||
page_size = "0.6.0"
|
||||
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
@ -30,7 +32,6 @@ serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
memmap2 = "0.9.4"
|
||||
time = { version = "0.3.36", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
@ -40,7 +41,6 @@ time = { version = "0.3.36", features = [
|
||||
tracing = "0.1.40"
|
||||
ureq = "2.10.0"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
bumpalo = "3.16.0"
|
||||
|
||||
[dev-dependencies]
|
||||
arroy = "0.5.0"
|
||||
|
@ -24,7 +24,7 @@ flate2 = "1.0.30"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.9.4"
|
||||
milli = { path = "../milli" }
|
||||
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
||||
bumparaw-collections = "0.1.2"
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
|
@ -4,10 +4,10 @@ use std::io::{self, BufWriter};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use bumpalo::Bump;
|
||||
use bumparaw_collections::RawMap;
|
||||
use memmap2::Mmap;
|
||||
use milli::documents::Error;
|
||||
use milli::Object;
|
||||
use raw_collections::RawMap;
|
||||
use serde::de::{SeqAccess, Visitor};
|
||||
use serde::{Deserialize, Deserializer};
|
||||
use serde_json::error::Category;
|
||||
|
@ -91,8 +91,8 @@ ureq = { version = "2.10.0", features = ["json"] }
|
||||
url = "2.5.2"
|
||||
rayon-par-bridge = "0.1.0"
|
||||
hashbrown = "0.15.0"
|
||||
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
||||
bumpalo = "3.16.0"
|
||||
bumparaw-collections = "0.1.2"
|
||||
thread_local = "1.1.8"
|
||||
allocator-api2 = "0.2.18"
|
||||
rustc-hash = "2.0.0"
|
||||
|
@ -3,12 +3,13 @@ use std::collections::BTreeMap;
|
||||
use std::fmt::{self, Debug};
|
||||
|
||||
use bumpalo::Bump;
|
||||
use bumparaw_collections::{RawMap, RawVec, Value};
|
||||
use liquid::model::{
|
||||
ArrayView, DisplayCow, KString, KStringCow, ObjectRender, ObjectSource, ScalarCow, State,
|
||||
Value as LiquidValue,
|
||||
};
|
||||
use liquid::{ObjectView, ValueView};
|
||||
use raw_collections::{RawMap, RawVec};
|
||||
use rustc_hash::FxBuildHasher;
|
||||
use serde_json::value::RawValue;
|
||||
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
@ -195,7 +196,7 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ObjectView for ParseableDocument<'doc
|
||||
}
|
||||
|
||||
impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc, D> {
|
||||
fn as_debug(&self) -> &dyn fmt::Debug {
|
||||
fn as_debug(&self) -> &dyn Debug {
|
||||
self
|
||||
}
|
||||
fn render(&self) -> liquid::model::DisplayCow<'_> {
|
||||
@ -243,14 +244,13 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ParseableValue<'doc> {
|
||||
value: raw_collections::Value<'doc>,
|
||||
value: Value<'doc, FxBuildHasher>,
|
||||
}
|
||||
|
||||
impl<'doc> ParseableValue<'doc> {
|
||||
pub fn new(value: &'doc RawValue, doc_alloc: &'doc Bump) -> Self {
|
||||
let value = raw_collections::Value::from_raw_value(value, doc_alloc).unwrap();
|
||||
let value = Value::from_raw_value_and_hasher(value, FxBuildHasher, doc_alloc).unwrap();
|
||||
Self { value }
|
||||
}
|
||||
|
||||
@ -260,19 +260,19 @@ impl<'doc> ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
// transparent newtype for implementing ValueView
|
||||
#[repr(transparent)]
|
||||
#[derive(Debug)]
|
||||
struct ParseableMap<'doc>(RawMap<'doc>);
|
||||
#[repr(transparent)]
|
||||
struct ParseableMap<'doc>(RawMap<'doc, FxBuildHasher>);
|
||||
|
||||
// transparent newtype for implementing ValueView
|
||||
#[repr(transparent)]
|
||||
#[derive(Debug)]
|
||||
#[repr(transparent)]
|
||||
struct ParseableArray<'doc>(RawVec<'doc>);
|
||||
|
||||
impl<'doc> ParseableMap<'doc> {
|
||||
pub fn as_parseable<'a>(map: &'a RawMap<'doc>) -> &'a ParseableMap<'doc> {
|
||||
pub fn as_parseable<'a>(map: &'a RawMap<'doc, FxBuildHasher>) -> &'a ParseableMap<'doc> {
|
||||
// SAFETY: repr(transparent)
|
||||
unsafe { &*(map as *const RawMap as *const Self) }
|
||||
unsafe { &*(map as *const RawMap<FxBuildHasher> as *const Self) }
|
||||
}
|
||||
}
|
||||
|
||||
@ -447,8 +447,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn render(&self) -> DisplayCow<'_> {
|
||||
use raw_collections::value::Number;
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::value::Number;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
match &self.value {
|
||||
Value::Null => LiquidValue::Nil.render(),
|
||||
Value::Bool(v) => v.render(),
|
||||
@ -464,8 +465,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn source(&self) -> DisplayCow<'_> {
|
||||
use raw_collections::value::Number;
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::value::Number;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
match &self.value {
|
||||
Value::Null => LiquidValue::Nil.source(),
|
||||
Value::Bool(v) => ValueView::source(v),
|
||||
@ -481,8 +483,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn type_name(&self) -> &'static str {
|
||||
use raw_collections::value::Number;
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::value::Number;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
match &self.value {
|
||||
Value::Null => LiquidValue::Nil.type_name(),
|
||||
Value::Bool(v) => v.type_name(),
|
||||
@ -498,7 +501,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn query_state(&self, state: State) -> bool {
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
match &self.value {
|
||||
Value::Null => ValueView::query_state(&LiquidValue::Nil, state),
|
||||
Value::Bool(v) => ValueView::query_state(v, state),
|
||||
@ -515,7 +519,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn to_kstr(&self) -> KStringCow<'_> {
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
match &self.value {
|
||||
Value::Null => ValueView::to_kstr(&LiquidValue::Nil),
|
||||
Value::Bool(v) => ValueView::to_kstr(v),
|
||||
@ -527,12 +532,14 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn to_value(&self) -> LiquidValue {
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::value::Number;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
match &self.value {
|
||||
Value::Null => LiquidValue::Nil,
|
||||
Value::Bool(v) => LiquidValue::Scalar(liquid::model::ScalarCow::new(*v)),
|
||||
Value::Number(number) => match number {
|
||||
raw_collections::value::Number::PosInt(number) => {
|
||||
Number::PosInt(number) => {
|
||||
let number: i64 = match (*number).try_into() {
|
||||
Ok(number) => number,
|
||||
Err(_) => {
|
||||
@ -541,12 +548,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
};
|
||||
LiquidValue::Scalar(ScalarCow::new(number))
|
||||
}
|
||||
raw_collections::value::Number::NegInt(number) => {
|
||||
LiquidValue::Scalar(ScalarCow::new(*number))
|
||||
}
|
||||
raw_collections::value::Number::Finite(number) => {
|
||||
LiquidValue::Scalar(ScalarCow::new(*number))
|
||||
}
|
||||
Number::NegInt(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
|
||||
Number::Finite(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
|
||||
},
|
||||
Value::String(s) => LiquidValue::Scalar(liquid::model::ScalarCow::new(s.to_string())),
|
||||
Value::Array(raw_vec) => ParseableArray::as_parseable(raw_vec).to_value(),
|
||||
@ -555,8 +558,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn as_scalar(&self) -> Option<liquid::model::ScalarCow<'_>> {
|
||||
use raw_collections::value::Number;
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::value::Number;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
match &self.value {
|
||||
Value::Bool(v) => Some(liquid::model::ScalarCow::new(*v)),
|
||||
Value::Number(number) => match number {
|
||||
@ -576,34 +580,41 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
||||
}
|
||||
|
||||
fn is_scalar(&self) -> bool {
|
||||
use raw_collections::Value;
|
||||
use bumparaw_collections::Value;
|
||||
|
||||
matches!(&self.value, Value::Bool(_) | Value::Number(_) | Value::String(_))
|
||||
}
|
||||
|
||||
fn as_array(&self) -> Option<&dyn liquid::model::ArrayView> {
|
||||
if let raw_collections::Value::Array(array) = &self.value {
|
||||
if let Value::Array(array) = &self.value {
|
||||
return Some(ParseableArray::as_parseable(array) as _);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn is_array(&self) -> bool {
|
||||
matches!(&self.value, raw_collections::Value::Array(_))
|
||||
matches!(&self.value, bumparaw_collections::Value::Array(_))
|
||||
}
|
||||
|
||||
fn as_object(&self) -> Option<&dyn ObjectView> {
|
||||
if let raw_collections::Value::Object(object) = &self.value {
|
||||
if let Value::Object(object) = &self.value {
|
||||
return Some(ParseableMap::as_parseable(object) as _);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn is_object(&self) -> bool {
|
||||
matches!(&self.value, raw_collections::Value::Object(_))
|
||||
matches!(&self.value, bumparaw_collections::Value::Object(_))
|
||||
}
|
||||
|
||||
fn is_nil(&self) -> bool {
|
||||
matches!(&self.value, raw_collections::Value::Null)
|
||||
matches!(&self.value, bumparaw_collections::Value::Null)
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for ParseableValue<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("ParseableValue").field("value", &self.value).finish()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,8 @@
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
|
||||
use bumparaw_collections::RawMap;
|
||||
use heed::RoTxn;
|
||||
use raw_collections::RawMap;
|
||||
use rustc_hash::FxBuildHasher;
|
||||
use serde_json::value::RawValue;
|
||||
|
||||
use super::vector_document::VectorDocument;
|
||||
@ -385,12 +386,12 @@ pub type Entry<'doc> = (&'doc str, &'doc RawValue);
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Versions<'doc> {
|
||||
data: RawMap<'doc>,
|
||||
data: RawMap<'doc, FxBuildHasher>,
|
||||
}
|
||||
|
||||
impl<'doc> Versions<'doc> {
|
||||
pub fn multiple(
|
||||
mut versions: impl Iterator<Item = Result<RawMap<'doc>>>,
|
||||
mut versions: impl Iterator<Item = Result<RawMap<'doc, FxBuildHasher>>>,
|
||||
) -> Result<Option<Self>> {
|
||||
let Some(data) = versions.next() else { return Ok(None) };
|
||||
let mut data = data?;
|
||||
@ -403,7 +404,7 @@ impl<'doc> Versions<'doc> {
|
||||
Ok(Some(Self::single(data)))
|
||||
}
|
||||
|
||||
pub fn single(version: RawMap<'doc>) -> Self {
|
||||
pub fn single(version: RawMap<'doc, FxBuildHasher>) -> Self {
|
||||
Self { data: version }
|
||||
}
|
||||
|
||||
|
@ -69,12 +69,12 @@ use std::io::BufReader;
|
||||
use std::{io, iter, mem};
|
||||
|
||||
use bumpalo::Bump;
|
||||
use bumparaw_collections::bbbul::{BitPacker, BitPacker4x};
|
||||
use bumparaw_collections::map::FrozenMap;
|
||||
use bumparaw_collections::{Bbbul, FrozenBbbul};
|
||||
use grenad::ReaderCursor;
|
||||
use hashbrown::hash_map::RawEntryMut;
|
||||
use hashbrown::HashMap;
|
||||
use raw_collections::bbbul::{BitPacker, BitPacker4x};
|
||||
use raw_collections::map::FrozenMap;
|
||||
use raw_collections::{Bbbul, FrozenBbbul};
|
||||
use roaring::RoaringBitmap;
|
||||
use rustc_hash::FxBuildHasher;
|
||||
|
||||
|
@ -176,9 +176,10 @@ pub fn tokenizer_builder<'a>(
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use bumpalo::Bump;
|
||||
use bumparaw_collections::RawMap;
|
||||
use charabia::TokenizerBuilder;
|
||||
use meili_snap::snapshot;
|
||||
use raw_collections::RawMap;
|
||||
use rustc_hash::FxBuildHasher;
|
||||
use serde_json::json;
|
||||
use serde_json::value::RawValue;
|
||||
|
||||
@ -234,7 +235,7 @@ mod test {
|
||||
|
||||
let bump = Bump::new();
|
||||
let document: &RawValue = serde_json::from_str(&document).unwrap();
|
||||
let document = RawMap::from_raw_value(document, &bump).unwrap();
|
||||
let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, &bump).unwrap();
|
||||
|
||||
let document = Versions::single(document);
|
||||
let document = DocumentFromVersions::new(&document);
|
||||
|
@ -1,6 +1,8 @@
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use bumpalo::Bump;
|
||||
use bumparaw_collections::RawVec;
|
||||
use rustc_hash::FxBuildHasher;
|
||||
use serde::de::{DeserializeSeed, Deserializer as _, Visitor};
|
||||
use serde_json::value::RawValue;
|
||||
|
||||
@ -360,7 +362,7 @@ impl<'a> DeserrRawValue<'a> {
|
||||
}
|
||||
|
||||
pub struct DeserrRawVec<'a> {
|
||||
vec: raw_collections::RawVec<'a>,
|
||||
vec: RawVec<'a>,
|
||||
alloc: &'a Bump,
|
||||
}
|
||||
|
||||
@ -379,7 +381,7 @@ impl<'a> deserr::Sequence for DeserrRawVec<'a> {
|
||||
}
|
||||
|
||||
pub struct DeserrRawVecIter<'a> {
|
||||
it: raw_collections::vec::iter::IntoIter<'a>,
|
||||
it: bumparaw_collections::vec::iter::IntoIter<'a>,
|
||||
alloc: &'a Bump,
|
||||
}
|
||||
|
||||
@ -393,7 +395,7 @@ impl<'a> Iterator for DeserrRawVecIter<'a> {
|
||||
}
|
||||
|
||||
pub struct DeserrRawMap<'a> {
|
||||
map: raw_collections::RawMap<'a>,
|
||||
map: bumparaw_collections::RawMap<'a, FxBuildHasher>,
|
||||
alloc: &'a Bump,
|
||||
}
|
||||
|
||||
@ -416,7 +418,7 @@ impl<'a> deserr::Map for DeserrRawMap<'a> {
|
||||
}
|
||||
|
||||
pub struct DeserrRawMapIter<'a> {
|
||||
it: raw_collections::map::iter::IntoIter<'a>,
|
||||
it: bumparaw_collections::map::iter::IntoIter<'a>,
|
||||
alloc: &'a Bump,
|
||||
}
|
||||
|
||||
@ -615,7 +617,7 @@ impl<'de> Visitor<'de> for DeserrRawValueVisitor<'de> {
|
||||
where
|
||||
A: serde::de::SeqAccess<'de>,
|
||||
{
|
||||
let mut raw_vec = raw_collections::RawVec::new_in(self.alloc);
|
||||
let mut raw_vec = RawVec::new_in(self.alloc);
|
||||
while let Some(next) = seq.next_element()? {
|
||||
raw_vec.push(next);
|
||||
}
|
||||
|
@ -1,10 +1,11 @@
|
||||
use bumpalo::collections::CollectIn;
|
||||
use bumpalo::Bump;
|
||||
use bumparaw_collections::RawMap;
|
||||
use hashbrown::hash_map::Entry;
|
||||
use heed::RoTxn;
|
||||
use memmap2::Mmap;
|
||||
use raw_collections::RawMap;
|
||||
use rayon::slice::ParallelSlice;
|
||||
use rustc_hash::FxBuildHasher;
|
||||
use serde_json::value::RawValue;
|
||||
use serde_json::Deserializer;
|
||||
|
||||
@ -166,8 +167,9 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
|
||||
|
||||
// Only guess the primary key if it is the first document
|
||||
let retrieved_primary_key = if previous_offset == 0 {
|
||||
let doc =
|
||||
RawMap::from_raw_value(doc, indexer).map(Some).map_err(UserError::SerdeJson)?;
|
||||
let doc = RawMap::from_raw_value_and_hasher(doc, FxBuildHasher, indexer)
|
||||
.map(Some)
|
||||
.map_err(UserError::SerdeJson)?;
|
||||
|
||||
let result = retrieve_or_guess_primary_key(
|
||||
rtxn,
|
||||
@ -545,7 +547,8 @@ impl MergeChanges for MergeDocumentForReplacement {
|
||||
match operations.last() {
|
||||
Some(InnerDocOp::Addition(DocumentOffset { content })) => {
|
||||
let document = serde_json::from_slice(content).unwrap();
|
||||
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
||||
let document =
|
||||
RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
||||
.map_err(UserError::SerdeJson)?;
|
||||
|
||||
if is_new {
|
||||
@ -632,7 +635,8 @@ impl MergeChanges for MergeDocumentForUpdates {
|
||||
}
|
||||
};
|
||||
let document = serde_json::from_slice(content).unwrap();
|
||||
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
||||
let document =
|
||||
RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
||||
.map_err(UserError::SerdeJson)?;
|
||||
|
||||
Some(Versions::single(document))
|
||||
@ -647,7 +651,8 @@ impl MergeChanges for MergeDocumentForUpdates {
|
||||
};
|
||||
|
||||
let document = serde_json::from_slice(content).unwrap();
|
||||
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
||||
let document =
|
||||
RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
||||
.map_err(UserError::SerdeJson)?;
|
||||
Ok(document)
|
||||
});
|
||||
|
@ -4,6 +4,7 @@ use std::sync::{OnceLock, RwLock};
|
||||
use std::thread::{self, Builder};
|
||||
|
||||
use big_s::S;
|
||||
use bumparaw_collections::RawMap;
|
||||
use document_changes::{extract, DocumentChanges, IndexingContext, Progress};
|
||||
pub use document_deletion::DocumentDeletion;
|
||||
pub use document_operation::{DocumentOperation, PayloadStats};
|
||||
@ -13,7 +14,7 @@ use heed::{RoTxn, RwTxn};
|
||||
use itertools::{merge_join_by, EitherOrBoth};
|
||||
pub use partial_dump::PartialDump;
|
||||
use rand::SeedableRng as _;
|
||||
use raw_collections::RawMap;
|
||||
use rustc_hash::FxBuildHasher;
|
||||
use time::OffsetDateTime;
|
||||
pub use update_by_function::UpdateByFunction;
|
||||
|
||||
@ -776,7 +777,7 @@ pub fn retrieve_or_guess_primary_key<'a>(
|
||||
index: &Index,
|
||||
new_fields_ids_map: &mut FieldsIdsMap,
|
||||
primary_key_from_op: Option<&'a str>,
|
||||
first_document: Option<RawMap<'a>>,
|
||||
first_document: Option<RawMap<'a, FxBuildHasher>>,
|
||||
) -> Result<StdResult<(PrimaryKey<'a>, bool), UserError>> {
|
||||
// make sure that we have a declared primary key, either fetching it from the index or attempting to guess it.
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
use std::ops::DerefMut;
|
||||
|
||||
use bumparaw_collections::RawMap;
|
||||
use rayon::iter::IndexedParallelIterator;
|
||||
use rustc_hash::FxBuildHasher;
|
||||
use serde_json::value::RawValue;
|
||||
|
||||
use super::document_changes::{DocumentChangeContext, DocumentChanges};
|
||||
@ -75,7 +77,7 @@ where
|
||||
self.primary_key.extract_fields_and_docid(document, fields_ids_map, doc_alloc)?;
|
||||
let external_document_id = external_document_id.to_de();
|
||||
|
||||
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
||||
let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
||||
.map_err(InternalError::SerdeJson)?;
|
||||
|
||||
let insertion = Insertion::create(docid, external_document_id, Versions::single(document));
|
||||
|
@ -1,8 +1,9 @@
|
||||
use raw_collections::RawMap;
|
||||
use bumparaw_collections::RawMap;
|
||||
use rayon::iter::IndexedParallelIterator;
|
||||
use rayon::slice::ParallelSlice as _;
|
||||
use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST};
|
||||
use roaring::RoaringBitmap;
|
||||
use rustc_hash::FxBuildHasher;
|
||||
|
||||
use super::document_changes::DocumentChangeContext;
|
||||
use super::DocumentChanges;
|
||||
@ -160,7 +161,11 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> {
|
||||
if document_id != new_document_id {
|
||||
Err(Error::UserError(UserError::DocumentEditionCannotModifyPrimaryKey))
|
||||
} else {
|
||||
let raw_new_doc = RawMap::from_raw_value(raw_new_doc, doc_alloc)
|
||||
let raw_new_doc = RawMap::from_raw_value_and_hasher(
|
||||
raw_new_doc,
|
||||
FxBuildHasher,
|
||||
doc_alloc,
|
||||
)
|
||||
.map_err(InternalError::SerdeJson)?;
|
||||
|
||||
Ok(Some(DocumentChange::Update(Update::create(
|
||||
|
@ -1,9 +1,10 @@
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use bumpalo::Bump;
|
||||
use bumparaw_collections::RawMap;
|
||||
use deserr::{Deserr, IntoValue};
|
||||
use heed::RoTxn;
|
||||
use raw_collections::RawMap;
|
||||
use rustc_hash::FxBuildHasher;
|
||||
use serde::Serialize;
|
||||
use serde_json::value::RawValue;
|
||||
|
||||
@ -84,7 +85,7 @@ pub struct VectorDocumentFromDb<'t> {
|
||||
docid: DocumentId,
|
||||
embedding_config: Vec<IndexEmbeddingConfig>,
|
||||
index: &'t Index,
|
||||
vectors_field: Option<RawMap<'t>>,
|
||||
vectors_field: Option<RawMap<'t, FxBuildHasher>>,
|
||||
rtxn: &'t RoTxn<'t>,
|
||||
doc_alloc: &'t Bump,
|
||||
}
|
||||
@ -102,9 +103,10 @@ impl<'t> VectorDocumentFromDb<'t> {
|
||||
};
|
||||
let vectors = document.vectors_field()?;
|
||||
let vectors_field = match vectors {
|
||||
Some(vectors) => {
|
||||
Some(RawMap::from_raw_value(vectors, doc_alloc).map_err(InternalError::SerdeJson)?)
|
||||
}
|
||||
Some(vectors) => Some(
|
||||
RawMap::from_raw_value_and_hasher(vectors, FxBuildHasher, doc_alloc)
|
||||
.map_err(InternalError::SerdeJson)?,
|
||||
),
|
||||
None => None,
|
||||
};
|
||||
|
||||
@ -220,7 +222,7 @@ fn entry_from_raw_value(
|
||||
|
||||
pub struct VectorDocumentFromVersions<'doc> {
|
||||
external_document_id: &'doc str,
|
||||
vectors: RawMap<'doc>,
|
||||
vectors: RawMap<'doc, FxBuildHasher>,
|
||||
embedders: &'doc EmbeddingConfigs,
|
||||
}
|
||||
|
||||
@ -233,8 +235,8 @@ impl<'doc> VectorDocumentFromVersions<'doc> {
|
||||
) -> Result<Option<Self>> {
|
||||
let document = DocumentFromVersions::new(versions);
|
||||
if let Some(vectors_field) = document.vectors_field()? {
|
||||
let vectors =
|
||||
RawMap::from_raw_value(vectors_field, bump).map_err(UserError::SerdeJson)?;
|
||||
let vectors = RawMap::from_raw_value_and_hasher(vectors_field, FxBuildHasher, bump)
|
||||
.map_err(UserError::SerdeJson)?;
|
||||
Ok(Some(Self { external_document_id, vectors, embedders }))
|
||||
} else {
|
||||
Ok(None)
|
||||
|
Loading…
x
Reference in New Issue
Block a user