mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-10 13:34:30 +01:00
Merge #5145
5145: Use bumparaw-collections in Meilisearch/milli r=dureuill a=Kerollmops This PR is related to #5078. It uses the now published bumparaw-collections and (soon) makes the `RawMap` hasher nonrandom. Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
commit
e974be9518
33
Cargo.lock
generated
33
Cargo.lock
generated
@ -706,6 +706,20 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bumparaw-collections"
|
||||||
|
version = "0.1.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4ce682bdc86c2e25ef5cd95881d9d6a1902214eddf74cf9ffea88fe1464377e8"
|
||||||
|
dependencies = [
|
||||||
|
"allocator-api2",
|
||||||
|
"bitpacking",
|
||||||
|
"bumpalo",
|
||||||
|
"hashbrown 0.15.1",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "byte-unit"
|
name = "byte-unit"
|
||||||
version = "5.1.4"
|
version = "5.1.4"
|
||||||
@ -2617,6 +2631,7 @@ dependencies = [
|
|||||||
"big_s",
|
"big_s",
|
||||||
"bincode",
|
"bincode",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
|
"bumparaw-collections",
|
||||||
"crossbeam-channel",
|
"crossbeam-channel",
|
||||||
"csv",
|
"csv",
|
||||||
"derive_builder 0.20.0",
|
"derive_builder 0.20.0",
|
||||||
@ -2631,7 +2646,6 @@ dependencies = [
|
|||||||
"meilisearch-types",
|
"meilisearch-types",
|
||||||
"memmap2",
|
"memmap2",
|
||||||
"page_size",
|
"page_size",
|
||||||
"raw-collections",
|
|
||||||
"rayon",
|
"rayon",
|
||||||
"roaring",
|
"roaring",
|
||||||
"serde",
|
"serde",
|
||||||
@ -3549,6 +3563,7 @@ dependencies = [
|
|||||||
"actix-web",
|
"actix-web",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
|
"bumparaw-collections",
|
||||||
"convert_case 0.6.0",
|
"convert_case 0.6.0",
|
||||||
"csv",
|
"csv",
|
||||||
"deserr",
|
"deserr",
|
||||||
@ -3561,7 +3576,6 @@ dependencies = [
|
|||||||
"meili-snap",
|
"meili-snap",
|
||||||
"memmap2",
|
"memmap2",
|
||||||
"milli",
|
"milli",
|
||||||
"raw-collections",
|
|
||||||
"roaring",
|
"roaring",
|
||||||
"serde",
|
"serde",
|
||||||
"serde-cs",
|
"serde-cs",
|
||||||
@ -3618,6 +3632,7 @@ dependencies = [
|
|||||||
"bincode",
|
"bincode",
|
||||||
"bstr",
|
"bstr",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
|
"bumparaw-collections",
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"candle-core",
|
"candle-core",
|
||||||
@ -3656,7 +3671,6 @@ dependencies = [
|
|||||||
"once_cell",
|
"once_cell",
|
||||||
"ordered-float",
|
"ordered-float",
|
||||||
"rand",
|
"rand",
|
||||||
"raw-collections",
|
|
||||||
"rayon",
|
"rayon",
|
||||||
"rayon-par-bridge",
|
"rayon-par-bridge",
|
||||||
"rhai",
|
"rhai",
|
||||||
@ -4487,19 +4501,6 @@ dependencies = [
|
|||||||
"rand",
|
"rand",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "raw-collections"
|
|
||||||
version = "0.1.0"
|
|
||||||
source = "git+https://github.com/meilisearch/raw-collections.git#15e5d7bdebc0c149b2a28b2454f307c717d07f8a"
|
|
||||||
dependencies = [
|
|
||||||
"allocator-api2",
|
|
||||||
"bitpacking",
|
|
||||||
"bumpalo",
|
|
||||||
"hashbrown 0.15.1",
|
|
||||||
"serde",
|
|
||||||
"serde_json",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "raw-cpuid"
|
name = "raw-cpuid"
|
||||||
version = "10.7.0"
|
version = "10.7.0"
|
||||||
|
@ -13,6 +13,8 @@ license.workspace = true
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.86"
|
anyhow = "1.0.86"
|
||||||
bincode = "1.3.3"
|
bincode = "1.3.3"
|
||||||
|
bumpalo = "3.16.0"
|
||||||
|
bumparaw-collections = "0.1.2"
|
||||||
csv = "1.3.0"
|
csv = "1.3.0"
|
||||||
derive_builder = "0.20.0"
|
derive_builder = "0.20.0"
|
||||||
dump = { path = "../dump" }
|
dump = { path = "../dump" }
|
||||||
@ -21,8 +23,8 @@ file-store = { path = "../file-store" }
|
|||||||
flate2 = "1.0.30"
|
flate2 = "1.0.30"
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
|
memmap2 = "0.9.4"
|
||||||
page_size = "0.6.0"
|
page_size = "0.6.0"
|
||||||
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
|
||||||
rayon = "1.10.0"
|
rayon = "1.10.0"
|
||||||
roaring = { version = "0.10.7", features = ["serde"] }
|
roaring = { version = "0.10.7", features = ["serde"] }
|
||||||
serde = { version = "1.0.204", features = ["derive"] }
|
serde = { version = "1.0.204", features = ["derive"] }
|
||||||
@ -30,7 +32,6 @@ serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
|||||||
synchronoise = "1.0.1"
|
synchronoise = "1.0.1"
|
||||||
tempfile = "3.10.1"
|
tempfile = "3.10.1"
|
||||||
thiserror = "1.0.61"
|
thiserror = "1.0.61"
|
||||||
memmap2 = "0.9.4"
|
|
||||||
time = { version = "0.3.36", features = [
|
time = { version = "0.3.36", features = [
|
||||||
"serde-well-known",
|
"serde-well-known",
|
||||||
"formatting",
|
"formatting",
|
||||||
@ -40,7 +41,6 @@ time = { version = "0.3.36", features = [
|
|||||||
tracing = "0.1.40"
|
tracing = "0.1.40"
|
||||||
ureq = "2.10.0"
|
ureq = "2.10.0"
|
||||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||||
bumpalo = "3.16.0"
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
arroy = "0.5.0"
|
arroy = "0.5.0"
|
||||||
|
@ -24,7 +24,7 @@ flate2 = "1.0.30"
|
|||||||
fst = "0.4.7"
|
fst = "0.4.7"
|
||||||
memmap2 = "0.9.4"
|
memmap2 = "0.9.4"
|
||||||
milli = { path = "../milli" }
|
milli = { path = "../milli" }
|
||||||
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
bumparaw-collections = "0.1.2"
|
||||||
roaring = { version = "0.10.7", features = ["serde"] }
|
roaring = { version = "0.10.7", features = ["serde"] }
|
||||||
serde = { version = "1.0.204", features = ["derive"] }
|
serde = { version = "1.0.204", features = ["derive"] }
|
||||||
serde-cs = "0.2.4"
|
serde-cs = "0.2.4"
|
||||||
|
@ -4,10 +4,10 @@ use std::io::{self, BufWriter};
|
|||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
|
use bumparaw_collections::RawMap;
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
use milli::documents::Error;
|
use milli::documents::Error;
|
||||||
use milli::Object;
|
use milli::Object;
|
||||||
use raw_collections::RawMap;
|
|
||||||
use serde::de::{SeqAccess, Visitor};
|
use serde::de::{SeqAccess, Visitor};
|
||||||
use serde::{Deserialize, Deserializer};
|
use serde::{Deserialize, Deserializer};
|
||||||
use serde_json::error::Category;
|
use serde_json::error::Category;
|
||||||
|
@ -91,8 +91,8 @@ ureq = { version = "2.10.0", features = ["json"] }
|
|||||||
url = "2.5.2"
|
url = "2.5.2"
|
||||||
rayon-par-bridge = "0.1.0"
|
rayon-par-bridge = "0.1.0"
|
||||||
hashbrown = "0.15.0"
|
hashbrown = "0.15.0"
|
||||||
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
|
||||||
bumpalo = "3.16.0"
|
bumpalo = "3.16.0"
|
||||||
|
bumparaw-collections = "0.1.2"
|
||||||
thread_local = "1.1.8"
|
thread_local = "1.1.8"
|
||||||
allocator-api2 = "0.2.18"
|
allocator-api2 = "0.2.18"
|
||||||
rustc-hash = "2.0.0"
|
rustc-hash = "2.0.0"
|
||||||
|
@ -3,12 +3,13 @@ use std::collections::BTreeMap;
|
|||||||
use std::fmt::{self, Debug};
|
use std::fmt::{self, Debug};
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
|
use bumparaw_collections::{RawMap, RawVec, Value};
|
||||||
use liquid::model::{
|
use liquid::model::{
|
||||||
ArrayView, DisplayCow, KString, KStringCow, ObjectRender, ObjectSource, ScalarCow, State,
|
ArrayView, DisplayCow, KString, KStringCow, ObjectRender, ObjectSource, ScalarCow, State,
|
||||||
Value as LiquidValue,
|
Value as LiquidValue,
|
||||||
};
|
};
|
||||||
use liquid::{ObjectView, ValueView};
|
use liquid::{ObjectView, ValueView};
|
||||||
use raw_collections::{RawMap, RawVec};
|
use rustc_hash::FxBuildHasher;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||||
@ -195,7 +196,7 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ObjectView for ParseableDocument<'doc
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc, D> {
|
impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc, D> {
|
||||||
fn as_debug(&self) -> &dyn fmt::Debug {
|
fn as_debug(&self) -> &dyn Debug {
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
fn render(&self) -> liquid::model::DisplayCow<'_> {
|
fn render(&self) -> liquid::model::DisplayCow<'_> {
|
||||||
@ -243,14 +244,13 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
struct ParseableValue<'doc> {
|
struct ParseableValue<'doc> {
|
||||||
value: raw_collections::Value<'doc>,
|
value: Value<'doc, FxBuildHasher>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'doc> ParseableValue<'doc> {
|
impl<'doc> ParseableValue<'doc> {
|
||||||
pub fn new(value: &'doc RawValue, doc_alloc: &'doc Bump) -> Self {
|
pub fn new(value: &'doc RawValue, doc_alloc: &'doc Bump) -> Self {
|
||||||
let value = raw_collections::Value::from_raw_value(value, doc_alloc).unwrap();
|
let value = Value::from_raw_value_and_hasher(value, FxBuildHasher, doc_alloc).unwrap();
|
||||||
Self { value }
|
Self { value }
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -260,19 +260,19 @@ impl<'doc> ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// transparent newtype for implementing ValueView
|
// transparent newtype for implementing ValueView
|
||||||
#[repr(transparent)]
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct ParseableMap<'doc>(RawMap<'doc>);
|
#[repr(transparent)]
|
||||||
|
struct ParseableMap<'doc>(RawMap<'doc, FxBuildHasher>);
|
||||||
|
|
||||||
// transparent newtype for implementing ValueView
|
// transparent newtype for implementing ValueView
|
||||||
#[repr(transparent)]
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
#[repr(transparent)]
|
||||||
struct ParseableArray<'doc>(RawVec<'doc>);
|
struct ParseableArray<'doc>(RawVec<'doc>);
|
||||||
|
|
||||||
impl<'doc> ParseableMap<'doc> {
|
impl<'doc> ParseableMap<'doc> {
|
||||||
pub fn as_parseable<'a>(map: &'a RawMap<'doc>) -> &'a ParseableMap<'doc> {
|
pub fn as_parseable<'a>(map: &'a RawMap<'doc, FxBuildHasher>) -> &'a ParseableMap<'doc> {
|
||||||
// SAFETY: repr(transparent)
|
// SAFETY: repr(transparent)
|
||||||
unsafe { &*(map as *const RawMap as *const Self) }
|
unsafe { &*(map as *const RawMap<FxBuildHasher> as *const Self) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -447,8 +447,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn render(&self) -> DisplayCow<'_> {
|
fn render(&self) -> DisplayCow<'_> {
|
||||||
use raw_collections::value::Number;
|
use bumparaw_collections::value::Number;
|
||||||
use raw_collections::Value;
|
use bumparaw_collections::Value;
|
||||||
|
|
||||||
match &self.value {
|
match &self.value {
|
||||||
Value::Null => LiquidValue::Nil.render(),
|
Value::Null => LiquidValue::Nil.render(),
|
||||||
Value::Bool(v) => v.render(),
|
Value::Bool(v) => v.render(),
|
||||||
@ -464,8 +465,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn source(&self) -> DisplayCow<'_> {
|
fn source(&self) -> DisplayCow<'_> {
|
||||||
use raw_collections::value::Number;
|
use bumparaw_collections::value::Number;
|
||||||
use raw_collections::Value;
|
use bumparaw_collections::Value;
|
||||||
|
|
||||||
match &self.value {
|
match &self.value {
|
||||||
Value::Null => LiquidValue::Nil.source(),
|
Value::Null => LiquidValue::Nil.source(),
|
||||||
Value::Bool(v) => ValueView::source(v),
|
Value::Bool(v) => ValueView::source(v),
|
||||||
@ -481,8 +483,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn type_name(&self) -> &'static str {
|
fn type_name(&self) -> &'static str {
|
||||||
use raw_collections::value::Number;
|
use bumparaw_collections::value::Number;
|
||||||
use raw_collections::Value;
|
use bumparaw_collections::Value;
|
||||||
|
|
||||||
match &self.value {
|
match &self.value {
|
||||||
Value::Null => LiquidValue::Nil.type_name(),
|
Value::Null => LiquidValue::Nil.type_name(),
|
||||||
Value::Bool(v) => v.type_name(),
|
Value::Bool(v) => v.type_name(),
|
||||||
@ -498,7 +501,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn query_state(&self, state: State) -> bool {
|
fn query_state(&self, state: State) -> bool {
|
||||||
use raw_collections::Value;
|
use bumparaw_collections::Value;
|
||||||
|
|
||||||
match &self.value {
|
match &self.value {
|
||||||
Value::Null => ValueView::query_state(&LiquidValue::Nil, state),
|
Value::Null => ValueView::query_state(&LiquidValue::Nil, state),
|
||||||
Value::Bool(v) => ValueView::query_state(v, state),
|
Value::Bool(v) => ValueView::query_state(v, state),
|
||||||
@ -515,7 +519,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn to_kstr(&self) -> KStringCow<'_> {
|
fn to_kstr(&self) -> KStringCow<'_> {
|
||||||
use raw_collections::Value;
|
use bumparaw_collections::Value;
|
||||||
|
|
||||||
match &self.value {
|
match &self.value {
|
||||||
Value::Null => ValueView::to_kstr(&LiquidValue::Nil),
|
Value::Null => ValueView::to_kstr(&LiquidValue::Nil),
|
||||||
Value::Bool(v) => ValueView::to_kstr(v),
|
Value::Bool(v) => ValueView::to_kstr(v),
|
||||||
@ -527,12 +532,14 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn to_value(&self) -> LiquidValue {
|
fn to_value(&self) -> LiquidValue {
|
||||||
use raw_collections::Value;
|
use bumparaw_collections::value::Number;
|
||||||
|
use bumparaw_collections::Value;
|
||||||
|
|
||||||
match &self.value {
|
match &self.value {
|
||||||
Value::Null => LiquidValue::Nil,
|
Value::Null => LiquidValue::Nil,
|
||||||
Value::Bool(v) => LiquidValue::Scalar(liquid::model::ScalarCow::new(*v)),
|
Value::Bool(v) => LiquidValue::Scalar(liquid::model::ScalarCow::new(*v)),
|
||||||
Value::Number(number) => match number {
|
Value::Number(number) => match number {
|
||||||
raw_collections::value::Number::PosInt(number) => {
|
Number::PosInt(number) => {
|
||||||
let number: i64 = match (*number).try_into() {
|
let number: i64 = match (*number).try_into() {
|
||||||
Ok(number) => number,
|
Ok(number) => number,
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
@ -541,12 +548,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
};
|
};
|
||||||
LiquidValue::Scalar(ScalarCow::new(number))
|
LiquidValue::Scalar(ScalarCow::new(number))
|
||||||
}
|
}
|
||||||
raw_collections::value::Number::NegInt(number) => {
|
Number::NegInt(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
|
||||||
LiquidValue::Scalar(ScalarCow::new(*number))
|
Number::Finite(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
|
||||||
}
|
|
||||||
raw_collections::value::Number::Finite(number) => {
|
|
||||||
LiquidValue::Scalar(ScalarCow::new(*number))
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
Value::String(s) => LiquidValue::Scalar(liquid::model::ScalarCow::new(s.to_string())),
|
Value::String(s) => LiquidValue::Scalar(liquid::model::ScalarCow::new(s.to_string())),
|
||||||
Value::Array(raw_vec) => ParseableArray::as_parseable(raw_vec).to_value(),
|
Value::Array(raw_vec) => ParseableArray::as_parseable(raw_vec).to_value(),
|
||||||
@ -555,8 +558,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn as_scalar(&self) -> Option<liquid::model::ScalarCow<'_>> {
|
fn as_scalar(&self) -> Option<liquid::model::ScalarCow<'_>> {
|
||||||
use raw_collections::value::Number;
|
use bumparaw_collections::value::Number;
|
||||||
use raw_collections::Value;
|
use bumparaw_collections::Value;
|
||||||
|
|
||||||
match &self.value {
|
match &self.value {
|
||||||
Value::Bool(v) => Some(liquid::model::ScalarCow::new(*v)),
|
Value::Bool(v) => Some(liquid::model::ScalarCow::new(*v)),
|
||||||
Value::Number(number) => match number {
|
Value::Number(number) => match number {
|
||||||
@ -576,34 +580,41 @@ impl<'doc> ValueView for ParseableValue<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn is_scalar(&self) -> bool {
|
fn is_scalar(&self) -> bool {
|
||||||
use raw_collections::Value;
|
use bumparaw_collections::Value;
|
||||||
|
|
||||||
matches!(&self.value, Value::Bool(_) | Value::Number(_) | Value::String(_))
|
matches!(&self.value, Value::Bool(_) | Value::Number(_) | Value::String(_))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn as_array(&self) -> Option<&dyn liquid::model::ArrayView> {
|
fn as_array(&self) -> Option<&dyn liquid::model::ArrayView> {
|
||||||
if let raw_collections::Value::Array(array) = &self.value {
|
if let Value::Array(array) = &self.value {
|
||||||
return Some(ParseableArray::as_parseable(array) as _);
|
return Some(ParseableArray::as_parseable(array) as _);
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_array(&self) -> bool {
|
fn is_array(&self) -> bool {
|
||||||
matches!(&self.value, raw_collections::Value::Array(_))
|
matches!(&self.value, bumparaw_collections::Value::Array(_))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn as_object(&self) -> Option<&dyn ObjectView> {
|
fn as_object(&self) -> Option<&dyn ObjectView> {
|
||||||
if let raw_collections::Value::Object(object) = &self.value {
|
if let Value::Object(object) = &self.value {
|
||||||
return Some(ParseableMap::as_parseable(object) as _);
|
return Some(ParseableMap::as_parseable(object) as _);
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_object(&self) -> bool {
|
fn is_object(&self) -> bool {
|
||||||
matches!(&self.value, raw_collections::Value::Object(_))
|
matches!(&self.value, bumparaw_collections::Value::Object(_))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_nil(&self) -> bool {
|
fn is_nil(&self) -> bool {
|
||||||
matches!(&self.value, raw_collections::Value::Null)
|
matches!(&self.value, bumparaw_collections::Value::Null)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for ParseableValue<'_> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
f.debug_struct("ParseableValue").field("value", &self.value).finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
use std::collections::{BTreeMap, BTreeSet};
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
|
|
||||||
|
use bumparaw_collections::RawMap;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use raw_collections::RawMap;
|
use rustc_hash::FxBuildHasher;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
use super::vector_document::VectorDocument;
|
use super::vector_document::VectorDocument;
|
||||||
@ -385,12 +386,12 @@ pub type Entry<'doc> = (&'doc str, &'doc RawValue);
|
|||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Versions<'doc> {
|
pub struct Versions<'doc> {
|
||||||
data: RawMap<'doc>,
|
data: RawMap<'doc, FxBuildHasher>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'doc> Versions<'doc> {
|
impl<'doc> Versions<'doc> {
|
||||||
pub fn multiple(
|
pub fn multiple(
|
||||||
mut versions: impl Iterator<Item = Result<RawMap<'doc>>>,
|
mut versions: impl Iterator<Item = Result<RawMap<'doc, FxBuildHasher>>>,
|
||||||
) -> Result<Option<Self>> {
|
) -> Result<Option<Self>> {
|
||||||
let Some(data) = versions.next() else { return Ok(None) };
|
let Some(data) = versions.next() else { return Ok(None) };
|
||||||
let mut data = data?;
|
let mut data = data?;
|
||||||
@ -403,7 +404,7 @@ impl<'doc> Versions<'doc> {
|
|||||||
Ok(Some(Self::single(data)))
|
Ok(Some(Self::single(data)))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn single(version: RawMap<'doc>) -> Self {
|
pub fn single(version: RawMap<'doc, FxBuildHasher>) -> Self {
|
||||||
Self { data: version }
|
Self { data: version }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,12 +69,12 @@ use std::io::BufReader;
|
|||||||
use std::{io, iter, mem};
|
use std::{io, iter, mem};
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
|
use bumparaw_collections::bbbul::{BitPacker, BitPacker4x};
|
||||||
|
use bumparaw_collections::map::FrozenMap;
|
||||||
|
use bumparaw_collections::{Bbbul, FrozenBbbul};
|
||||||
use grenad::ReaderCursor;
|
use grenad::ReaderCursor;
|
||||||
use hashbrown::hash_map::RawEntryMut;
|
use hashbrown::hash_map::RawEntryMut;
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
use raw_collections::bbbul::{BitPacker, BitPacker4x};
|
|
||||||
use raw_collections::map::FrozenMap;
|
|
||||||
use raw_collections::{Bbbul, FrozenBbbul};
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use rustc_hash::FxBuildHasher;
|
use rustc_hash::FxBuildHasher;
|
||||||
|
|
||||||
|
@ -176,9 +176,10 @@ pub fn tokenizer_builder<'a>(
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
|
use bumparaw_collections::RawMap;
|
||||||
use charabia::TokenizerBuilder;
|
use charabia::TokenizerBuilder;
|
||||||
use meili_snap::snapshot;
|
use meili_snap::snapshot;
|
||||||
use raw_collections::RawMap;
|
use rustc_hash::FxBuildHasher;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
@ -234,7 +235,7 @@ mod test {
|
|||||||
|
|
||||||
let bump = Bump::new();
|
let bump = Bump::new();
|
||||||
let document: &RawValue = serde_json::from_str(&document).unwrap();
|
let document: &RawValue = serde_json::from_str(&document).unwrap();
|
||||||
let document = RawMap::from_raw_value(document, &bump).unwrap();
|
let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, &bump).unwrap();
|
||||||
|
|
||||||
let document = Versions::single(document);
|
let document = Versions::single(document);
|
||||||
let document = DocumentFromVersions::new(&document);
|
let document = DocumentFromVersions::new(&document);
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
|
use bumparaw_collections::RawVec;
|
||||||
|
use rustc_hash::FxBuildHasher;
|
||||||
use serde::de::{DeserializeSeed, Deserializer as _, Visitor};
|
use serde::de::{DeserializeSeed, Deserializer as _, Visitor};
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
@ -360,7 +362,7 @@ impl<'a> DeserrRawValue<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct DeserrRawVec<'a> {
|
pub struct DeserrRawVec<'a> {
|
||||||
vec: raw_collections::RawVec<'a>,
|
vec: RawVec<'a>,
|
||||||
alloc: &'a Bump,
|
alloc: &'a Bump,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -379,7 +381,7 @@ impl<'a> deserr::Sequence for DeserrRawVec<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct DeserrRawVecIter<'a> {
|
pub struct DeserrRawVecIter<'a> {
|
||||||
it: raw_collections::vec::iter::IntoIter<'a>,
|
it: bumparaw_collections::vec::iter::IntoIter<'a>,
|
||||||
alloc: &'a Bump,
|
alloc: &'a Bump,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -393,7 +395,7 @@ impl<'a> Iterator for DeserrRawVecIter<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct DeserrRawMap<'a> {
|
pub struct DeserrRawMap<'a> {
|
||||||
map: raw_collections::RawMap<'a>,
|
map: bumparaw_collections::RawMap<'a, FxBuildHasher>,
|
||||||
alloc: &'a Bump,
|
alloc: &'a Bump,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -416,7 +418,7 @@ impl<'a> deserr::Map for DeserrRawMap<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct DeserrRawMapIter<'a> {
|
pub struct DeserrRawMapIter<'a> {
|
||||||
it: raw_collections::map::iter::IntoIter<'a>,
|
it: bumparaw_collections::map::iter::IntoIter<'a>,
|
||||||
alloc: &'a Bump,
|
alloc: &'a Bump,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -615,7 +617,7 @@ impl<'de> Visitor<'de> for DeserrRawValueVisitor<'de> {
|
|||||||
where
|
where
|
||||||
A: serde::de::SeqAccess<'de>,
|
A: serde::de::SeqAccess<'de>,
|
||||||
{
|
{
|
||||||
let mut raw_vec = raw_collections::RawVec::new_in(self.alloc);
|
let mut raw_vec = RawVec::new_in(self.alloc);
|
||||||
while let Some(next) = seq.next_element()? {
|
while let Some(next) = seq.next_element()? {
|
||||||
raw_vec.push(next);
|
raw_vec.push(next);
|
||||||
}
|
}
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
use bumpalo::collections::CollectIn;
|
use bumpalo::collections::CollectIn;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
|
use bumparaw_collections::RawMap;
|
||||||
use hashbrown::hash_map::Entry;
|
use hashbrown::hash_map::Entry;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
use raw_collections::RawMap;
|
|
||||||
use rayon::slice::ParallelSlice;
|
use rayon::slice::ParallelSlice;
|
||||||
|
use rustc_hash::FxBuildHasher;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
use serde_json::Deserializer;
|
use serde_json::Deserializer;
|
||||||
|
|
||||||
@ -166,8 +167,9 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
|
|||||||
|
|
||||||
// Only guess the primary key if it is the first document
|
// Only guess the primary key if it is the first document
|
||||||
let retrieved_primary_key = if previous_offset == 0 {
|
let retrieved_primary_key = if previous_offset == 0 {
|
||||||
let doc =
|
let doc = RawMap::from_raw_value_and_hasher(doc, FxBuildHasher, indexer)
|
||||||
RawMap::from_raw_value(doc, indexer).map(Some).map_err(UserError::SerdeJson)?;
|
.map(Some)
|
||||||
|
.map_err(UserError::SerdeJson)?;
|
||||||
|
|
||||||
let result = retrieve_or_guess_primary_key(
|
let result = retrieve_or_guess_primary_key(
|
||||||
rtxn,
|
rtxn,
|
||||||
@ -545,8 +547,9 @@ impl MergeChanges for MergeDocumentForReplacement {
|
|||||||
match operations.last() {
|
match operations.last() {
|
||||||
Some(InnerDocOp::Addition(DocumentOffset { content })) => {
|
Some(InnerDocOp::Addition(DocumentOffset { content })) => {
|
||||||
let document = serde_json::from_slice(content).unwrap();
|
let document = serde_json::from_slice(content).unwrap();
|
||||||
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
let document =
|
||||||
.map_err(UserError::SerdeJson)?;
|
RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
||||||
|
.map_err(UserError::SerdeJson)?;
|
||||||
|
|
||||||
if is_new {
|
if is_new {
|
||||||
Ok(Some(DocumentChange::Insertion(Insertion::create(
|
Ok(Some(DocumentChange::Insertion(Insertion::create(
|
||||||
@ -632,8 +635,9 @@ impl MergeChanges for MergeDocumentForUpdates {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
let document = serde_json::from_slice(content).unwrap();
|
let document = serde_json::from_slice(content).unwrap();
|
||||||
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
let document =
|
||||||
.map_err(UserError::SerdeJson)?;
|
RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
||||||
|
.map_err(UserError::SerdeJson)?;
|
||||||
|
|
||||||
Some(Versions::single(document))
|
Some(Versions::single(document))
|
||||||
}
|
}
|
||||||
@ -647,8 +651,9 @@ impl MergeChanges for MergeDocumentForUpdates {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let document = serde_json::from_slice(content).unwrap();
|
let document = serde_json::from_slice(content).unwrap();
|
||||||
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
let document =
|
||||||
.map_err(UserError::SerdeJson)?;
|
RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
||||||
|
.map_err(UserError::SerdeJson)?;
|
||||||
Ok(document)
|
Ok(document)
|
||||||
});
|
});
|
||||||
Versions::multiple(versions)?
|
Versions::multiple(versions)?
|
||||||
|
@ -4,6 +4,7 @@ use std::sync::{OnceLock, RwLock};
|
|||||||
use std::thread::{self, Builder};
|
use std::thread::{self, Builder};
|
||||||
|
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
|
use bumparaw_collections::RawMap;
|
||||||
use document_changes::{extract, DocumentChanges, IndexingContext, Progress};
|
use document_changes::{extract, DocumentChanges, IndexingContext, Progress};
|
||||||
pub use document_deletion::DocumentDeletion;
|
pub use document_deletion::DocumentDeletion;
|
||||||
pub use document_operation::{DocumentOperation, PayloadStats};
|
pub use document_operation::{DocumentOperation, PayloadStats};
|
||||||
@ -13,7 +14,7 @@ use heed::{RoTxn, RwTxn};
|
|||||||
use itertools::{merge_join_by, EitherOrBoth};
|
use itertools::{merge_join_by, EitherOrBoth};
|
||||||
pub use partial_dump::PartialDump;
|
pub use partial_dump::PartialDump;
|
||||||
use rand::SeedableRng as _;
|
use rand::SeedableRng as _;
|
||||||
use raw_collections::RawMap;
|
use rustc_hash::FxBuildHasher;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
pub use update_by_function::UpdateByFunction;
|
pub use update_by_function::UpdateByFunction;
|
||||||
|
|
||||||
@ -776,7 +777,7 @@ pub fn retrieve_or_guess_primary_key<'a>(
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
new_fields_ids_map: &mut FieldsIdsMap,
|
new_fields_ids_map: &mut FieldsIdsMap,
|
||||||
primary_key_from_op: Option<&'a str>,
|
primary_key_from_op: Option<&'a str>,
|
||||||
first_document: Option<RawMap<'a>>,
|
first_document: Option<RawMap<'a, FxBuildHasher>>,
|
||||||
) -> Result<StdResult<(PrimaryKey<'a>, bool), UserError>> {
|
) -> Result<StdResult<(PrimaryKey<'a>, bool), UserError>> {
|
||||||
// make sure that we have a declared primary key, either fetching it from the index or attempting to guess it.
|
// make sure that we have a declared primary key, either fetching it from the index or attempting to guess it.
|
||||||
|
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
use std::ops::DerefMut;
|
use std::ops::DerefMut;
|
||||||
|
|
||||||
|
use bumparaw_collections::RawMap;
|
||||||
use rayon::iter::IndexedParallelIterator;
|
use rayon::iter::IndexedParallelIterator;
|
||||||
|
use rustc_hash::FxBuildHasher;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
use super::document_changes::{DocumentChangeContext, DocumentChanges};
|
use super::document_changes::{DocumentChangeContext, DocumentChanges};
|
||||||
@ -75,7 +77,7 @@ where
|
|||||||
self.primary_key.extract_fields_and_docid(document, fields_ids_map, doc_alloc)?;
|
self.primary_key.extract_fields_and_docid(document, fields_ids_map, doc_alloc)?;
|
||||||
let external_document_id = external_document_id.to_de();
|
let external_document_id = external_document_id.to_de();
|
||||||
|
|
||||||
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
||||||
.map_err(InternalError::SerdeJson)?;
|
.map_err(InternalError::SerdeJson)?;
|
||||||
|
|
||||||
let insertion = Insertion::create(docid, external_document_id, Versions::single(document));
|
let insertion = Insertion::create(docid, external_document_id, Versions::single(document));
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
use raw_collections::RawMap;
|
use bumparaw_collections::RawMap;
|
||||||
use rayon::iter::IndexedParallelIterator;
|
use rayon::iter::IndexedParallelIterator;
|
||||||
use rayon::slice::ParallelSlice as _;
|
use rayon::slice::ParallelSlice as _;
|
||||||
use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST};
|
use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
use rustc_hash::FxBuildHasher;
|
||||||
|
|
||||||
use super::document_changes::DocumentChangeContext;
|
use super::document_changes::DocumentChangeContext;
|
||||||
use super::DocumentChanges;
|
use super::DocumentChanges;
|
||||||
@ -160,8 +161,12 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> {
|
|||||||
if document_id != new_document_id {
|
if document_id != new_document_id {
|
||||||
Err(Error::UserError(UserError::DocumentEditionCannotModifyPrimaryKey))
|
Err(Error::UserError(UserError::DocumentEditionCannotModifyPrimaryKey))
|
||||||
} else {
|
} else {
|
||||||
let raw_new_doc = RawMap::from_raw_value(raw_new_doc, doc_alloc)
|
let raw_new_doc = RawMap::from_raw_value_and_hasher(
|
||||||
.map_err(InternalError::SerdeJson)?;
|
raw_new_doc,
|
||||||
|
FxBuildHasher,
|
||||||
|
doc_alloc,
|
||||||
|
)
|
||||||
|
.map_err(InternalError::SerdeJson)?;
|
||||||
|
|
||||||
Ok(Some(DocumentChange::Update(Update::create(
|
Ok(Some(DocumentChange::Update(Update::create(
|
||||||
docid,
|
docid,
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
use std::collections::BTreeSet;
|
use std::collections::BTreeSet;
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
|
use bumparaw_collections::RawMap;
|
||||||
use deserr::{Deserr, IntoValue};
|
use deserr::{Deserr, IntoValue};
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use raw_collections::RawMap;
|
use rustc_hash::FxBuildHasher;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
@ -84,7 +85,7 @@ pub struct VectorDocumentFromDb<'t> {
|
|||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
embedding_config: Vec<IndexEmbeddingConfig>,
|
embedding_config: Vec<IndexEmbeddingConfig>,
|
||||||
index: &'t Index,
|
index: &'t Index,
|
||||||
vectors_field: Option<RawMap<'t>>,
|
vectors_field: Option<RawMap<'t, FxBuildHasher>>,
|
||||||
rtxn: &'t RoTxn<'t>,
|
rtxn: &'t RoTxn<'t>,
|
||||||
doc_alloc: &'t Bump,
|
doc_alloc: &'t Bump,
|
||||||
}
|
}
|
||||||
@ -102,9 +103,10 @@ impl<'t> VectorDocumentFromDb<'t> {
|
|||||||
};
|
};
|
||||||
let vectors = document.vectors_field()?;
|
let vectors = document.vectors_field()?;
|
||||||
let vectors_field = match vectors {
|
let vectors_field = match vectors {
|
||||||
Some(vectors) => {
|
Some(vectors) => Some(
|
||||||
Some(RawMap::from_raw_value(vectors, doc_alloc).map_err(InternalError::SerdeJson)?)
|
RawMap::from_raw_value_and_hasher(vectors, FxBuildHasher, doc_alloc)
|
||||||
}
|
.map_err(InternalError::SerdeJson)?,
|
||||||
|
),
|
||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -220,7 +222,7 @@ fn entry_from_raw_value(
|
|||||||
|
|
||||||
pub struct VectorDocumentFromVersions<'doc> {
|
pub struct VectorDocumentFromVersions<'doc> {
|
||||||
external_document_id: &'doc str,
|
external_document_id: &'doc str,
|
||||||
vectors: RawMap<'doc>,
|
vectors: RawMap<'doc, FxBuildHasher>,
|
||||||
embedders: &'doc EmbeddingConfigs,
|
embedders: &'doc EmbeddingConfigs,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -233,8 +235,8 @@ impl<'doc> VectorDocumentFromVersions<'doc> {
|
|||||||
) -> Result<Option<Self>> {
|
) -> Result<Option<Self>> {
|
||||||
let document = DocumentFromVersions::new(versions);
|
let document = DocumentFromVersions::new(versions);
|
||||||
if let Some(vectors_field) = document.vectors_field()? {
|
if let Some(vectors_field) = document.vectors_field()? {
|
||||||
let vectors =
|
let vectors = RawMap::from_raw_value_and_hasher(vectors_field, FxBuildHasher, bump)
|
||||||
RawMap::from_raw_value(vectors_field, bump).map_err(UserError::SerdeJson)?;
|
.map_err(UserError::SerdeJson)?;
|
||||||
Ok(Some(Self { external_document_id, vectors, embedders }))
|
Ok(Some(Self { external_document_id, vectors, embedders }))
|
||||||
} else {
|
} else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user