diff --git a/Cargo.lock b/Cargo.lock index 3c2fb711e..9476506ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -706,6 +706,20 @@ dependencies = [ "serde", ] +[[package]] +name = "bumparaw-collections" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ce682bdc86c2e25ef5cd95881d9d6a1902214eddf74cf9ffea88fe1464377e8" +dependencies = [ + "allocator-api2", + "bitpacking", + "bumpalo", + "hashbrown 0.15.1", + "serde", + "serde_json", +] + [[package]] name = "byte-unit" version = "5.1.4" @@ -2617,6 +2631,7 @@ dependencies = [ "big_s", "bincode", "bumpalo", + "bumparaw-collections", "crossbeam-channel", "csv", "derive_builder 0.20.0", @@ -2631,7 +2646,6 @@ dependencies = [ "meilisearch-types", "memmap2", "page_size", - "raw-collections", "rayon", "roaring", "serde", @@ -3549,6 +3563,7 @@ dependencies = [ "actix-web", "anyhow", "bumpalo", + "bumparaw-collections", "convert_case 0.6.0", "csv", "deserr", @@ -3561,7 +3576,6 @@ dependencies = [ "meili-snap", "memmap2", "milli", - "raw-collections", "roaring", "serde", "serde-cs", @@ -3618,6 +3632,7 @@ dependencies = [ "bincode", "bstr", "bumpalo", + "bumparaw-collections", "bytemuck", "byteorder", "candle-core", @@ -3656,7 +3671,6 @@ dependencies = [ "once_cell", "ordered-float", "rand", - "raw-collections", "rayon", "rayon-par-bridge", "rhai", @@ -4487,19 +4501,6 @@ dependencies = [ "rand", ] -[[package]] -name = "raw-collections" -version = "0.1.0" -source = "git+https://github.com/meilisearch/raw-collections.git#15e5d7bdebc0c149b2a28b2454f307c717d07f8a" -dependencies = [ - "allocator-api2", - "bitpacking", - "bumpalo", - "hashbrown 0.15.1", - "serde", - "serde_json", -] - [[package]] name = "raw-cpuid" version = "10.7.0" diff --git a/crates/index-scheduler/Cargo.toml b/crates/index-scheduler/Cargo.toml index ad4c1b4b9..5d7eb1913 100644 --- a/crates/index-scheduler/Cargo.toml +++ b/crates/index-scheduler/Cargo.toml @@ -13,6 +13,8 @@ license.workspace = true [dependencies] anyhow = "1.0.86" bincode = "1.3.3" +bumpalo = "3.16.0" +bumparaw-collections = "0.1.2" csv = "1.3.0" derive_builder = "0.20.0" dump = { path = "../dump" } @@ -21,8 +23,8 @@ file-store = { path = "../file-store" } flate2 = "1.0.30" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } +memmap2 = "0.9.4" page_size = "0.6.0" -raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" } rayon = "1.10.0" roaring = { version = "0.10.7", features = ["serde"] } serde = { version = "1.0.204", features = ["derive"] } @@ -30,7 +32,6 @@ serde_json = { version = "1.0.120", features = ["preserve_order"] } synchronoise = "1.0.1" tempfile = "3.10.1" thiserror = "1.0.61" -memmap2 = "0.9.4" time = { version = "0.3.36", features = [ "serde-well-known", "formatting", @@ -40,7 +41,6 @@ time = { version = "0.3.36", features = [ tracing = "0.1.40" ureq = "2.10.0" uuid = { version = "1.10.0", features = ["serde", "v4"] } -bumpalo = "3.16.0" [dev-dependencies] arroy = "0.5.0" diff --git a/crates/meilisearch-types/Cargo.toml b/crates/meilisearch-types/Cargo.toml index aca06a018..e81e6dd35 100644 --- a/crates/meilisearch-types/Cargo.toml +++ b/crates/meilisearch-types/Cargo.toml @@ -24,7 +24,7 @@ flate2 = "1.0.30" fst = "0.4.7" memmap2 = "0.9.4" milli = { path = "../milli" } -raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" } +bumparaw-collections = "0.1.2" roaring = { version = "0.10.7", features = ["serde"] } serde = { version = "1.0.204", features = ["derive"] } serde-cs = "0.2.4" diff --git a/crates/meilisearch-types/src/document_formats.rs b/crates/meilisearch-types/src/document_formats.rs index 008be4022..c6e8ad907 100644 --- a/crates/meilisearch-types/src/document_formats.rs +++ b/crates/meilisearch-types/src/document_formats.rs @@ -4,10 +4,10 @@ use std::io::{self, BufWriter}; use std::marker::PhantomData; use bumpalo::Bump; +use bumparaw_collections::RawMap; use memmap2::Mmap; use milli::documents::Error; use milli::Object; -use raw_collections::RawMap; use serde::de::{SeqAccess, Visitor}; use serde::{Deserialize, Deserializer}; use serde_json::error::Category; diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml index 2a959b654..9f113e013 100644 --- a/crates/milli/Cargo.toml +++ b/crates/milli/Cargo.toml @@ -91,8 +91,8 @@ ureq = { version = "2.10.0", features = ["json"] } url = "2.5.2" rayon-par-bridge = "0.1.0" hashbrown = "0.15.0" -raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" } bumpalo = "3.16.0" +bumparaw-collections = "0.1.2" thread_local = "1.1.8" allocator-api2 = "0.2.18" rustc-hash = "2.0.0" diff --git a/crates/milli/src/prompt/document.rs b/crates/milli/src/prompt/document.rs index dea7946da..ae0a506ac 100644 --- a/crates/milli/src/prompt/document.rs +++ b/crates/milli/src/prompt/document.rs @@ -3,12 +3,13 @@ use std::collections::BTreeMap; use std::fmt::{self, Debug}; use bumpalo::Bump; +use bumparaw_collections::{RawMap, RawVec, Value}; use liquid::model::{ ArrayView, DisplayCow, KString, KStringCow, ObjectRender, ObjectSource, ScalarCow, State, Value as LiquidValue, }; use liquid::{ObjectView, ValueView}; -use raw_collections::{RawMap, RawVec}; +use rustc_hash::FxBuildHasher; use serde_json::value::RawValue; use crate::update::del_add::{DelAdd, KvReaderDelAdd}; @@ -195,7 +196,7 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ObjectView for ParseableDocument<'doc } impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc, D> { - fn as_debug(&self) -> &dyn fmt::Debug { + fn as_debug(&self) -> &dyn Debug { self } fn render(&self) -> liquid::model::DisplayCow<'_> { @@ -243,14 +244,13 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc, } } -#[derive(Debug)] struct ParseableValue<'doc> { - value: raw_collections::Value<'doc>, + value: Value<'doc, FxBuildHasher>, } impl<'doc> ParseableValue<'doc> { pub fn new(value: &'doc RawValue, doc_alloc: &'doc Bump) -> Self { - let value = raw_collections::Value::from_raw_value(value, doc_alloc).unwrap(); + let value = Value::from_raw_value_and_hasher(value, FxBuildHasher, doc_alloc).unwrap(); Self { value } } @@ -260,19 +260,19 @@ impl<'doc> ParseableValue<'doc> { } // transparent newtype for implementing ValueView -#[repr(transparent)] #[derive(Debug)] -struct ParseableMap<'doc>(RawMap<'doc>); +#[repr(transparent)] +struct ParseableMap<'doc>(RawMap<'doc, FxBuildHasher>); // transparent newtype for implementing ValueView -#[repr(transparent)] #[derive(Debug)] +#[repr(transparent)] struct ParseableArray<'doc>(RawVec<'doc>); impl<'doc> ParseableMap<'doc> { - pub fn as_parseable<'a>(map: &'a RawMap<'doc>) -> &'a ParseableMap<'doc> { + pub fn as_parseable<'a>(map: &'a RawMap<'doc, FxBuildHasher>) -> &'a ParseableMap<'doc> { // SAFETY: repr(transparent) - unsafe { &*(map as *const RawMap as *const Self) } + unsafe { &*(map as *const RawMap as *const Self) } } } @@ -447,8 +447,9 @@ impl<'doc> ValueView for ParseableValue<'doc> { } fn render(&self) -> DisplayCow<'_> { - use raw_collections::value::Number; - use raw_collections::Value; + use bumparaw_collections::value::Number; + use bumparaw_collections::Value; + match &self.value { Value::Null => LiquidValue::Nil.render(), Value::Bool(v) => v.render(), @@ -464,8 +465,9 @@ impl<'doc> ValueView for ParseableValue<'doc> { } fn source(&self) -> DisplayCow<'_> { - use raw_collections::value::Number; - use raw_collections::Value; + use bumparaw_collections::value::Number; + use bumparaw_collections::Value; + match &self.value { Value::Null => LiquidValue::Nil.source(), Value::Bool(v) => ValueView::source(v), @@ -481,8 +483,9 @@ impl<'doc> ValueView for ParseableValue<'doc> { } fn type_name(&self) -> &'static str { - use raw_collections::value::Number; - use raw_collections::Value; + use bumparaw_collections::value::Number; + use bumparaw_collections::Value; + match &self.value { Value::Null => LiquidValue::Nil.type_name(), Value::Bool(v) => v.type_name(), @@ -498,7 +501,8 @@ impl<'doc> ValueView for ParseableValue<'doc> { } fn query_state(&self, state: State) -> bool { - use raw_collections::Value; + use bumparaw_collections::Value; + match &self.value { Value::Null => ValueView::query_state(&LiquidValue::Nil, state), Value::Bool(v) => ValueView::query_state(v, state), @@ -515,7 +519,8 @@ impl<'doc> ValueView for ParseableValue<'doc> { } fn to_kstr(&self) -> KStringCow<'_> { - use raw_collections::Value; + use bumparaw_collections::Value; + match &self.value { Value::Null => ValueView::to_kstr(&LiquidValue::Nil), Value::Bool(v) => ValueView::to_kstr(v), @@ -527,12 +532,14 @@ impl<'doc> ValueView for ParseableValue<'doc> { } fn to_value(&self) -> LiquidValue { - use raw_collections::Value; + use bumparaw_collections::value::Number; + use bumparaw_collections::Value; + match &self.value { Value::Null => LiquidValue::Nil, Value::Bool(v) => LiquidValue::Scalar(liquid::model::ScalarCow::new(*v)), Value::Number(number) => match number { - raw_collections::value::Number::PosInt(number) => { + Number::PosInt(number) => { let number: i64 = match (*number).try_into() { Ok(number) => number, Err(_) => { @@ -541,12 +548,8 @@ impl<'doc> ValueView for ParseableValue<'doc> { }; LiquidValue::Scalar(ScalarCow::new(number)) } - raw_collections::value::Number::NegInt(number) => { - LiquidValue::Scalar(ScalarCow::new(*number)) - } - raw_collections::value::Number::Finite(number) => { - LiquidValue::Scalar(ScalarCow::new(*number)) - } + Number::NegInt(number) => LiquidValue::Scalar(ScalarCow::new(*number)), + Number::Finite(number) => LiquidValue::Scalar(ScalarCow::new(*number)), }, Value::String(s) => LiquidValue::Scalar(liquid::model::ScalarCow::new(s.to_string())), Value::Array(raw_vec) => ParseableArray::as_parseable(raw_vec).to_value(), @@ -555,8 +558,9 @@ impl<'doc> ValueView for ParseableValue<'doc> { } fn as_scalar(&self) -> Option> { - use raw_collections::value::Number; - use raw_collections::Value; + use bumparaw_collections::value::Number; + use bumparaw_collections::Value; + match &self.value { Value::Bool(v) => Some(liquid::model::ScalarCow::new(*v)), Value::Number(number) => match number { @@ -576,34 +580,41 @@ impl<'doc> ValueView for ParseableValue<'doc> { } fn is_scalar(&self) -> bool { - use raw_collections::Value; + use bumparaw_collections::Value; + matches!(&self.value, Value::Bool(_) | Value::Number(_) | Value::String(_)) } fn as_array(&self) -> Option<&dyn liquid::model::ArrayView> { - if let raw_collections::Value::Array(array) = &self.value { + if let Value::Array(array) = &self.value { return Some(ParseableArray::as_parseable(array) as _); } None } fn is_array(&self) -> bool { - matches!(&self.value, raw_collections::Value::Array(_)) + matches!(&self.value, bumparaw_collections::Value::Array(_)) } fn as_object(&self) -> Option<&dyn ObjectView> { - if let raw_collections::Value::Object(object) = &self.value { + if let Value::Object(object) = &self.value { return Some(ParseableMap::as_parseable(object) as _); } None } fn is_object(&self) -> bool { - matches!(&self.value, raw_collections::Value::Object(_)) + matches!(&self.value, bumparaw_collections::Value::Object(_)) } fn is_nil(&self) -> bool { - matches!(&self.value, raw_collections::Value::Null) + matches!(&self.value, bumparaw_collections::Value::Null) + } +} + +impl Debug for ParseableValue<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ParseableValue").field("value", &self.value).finish() } } diff --git a/crates/milli/src/update/new/document.rs b/crates/milli/src/update/new/document.rs index b1a2218f2..930b0c078 100644 --- a/crates/milli/src/update/new/document.rs +++ b/crates/milli/src/update/new/document.rs @@ -1,7 +1,8 @@ use std::collections::{BTreeMap, BTreeSet}; +use bumparaw_collections::RawMap; use heed::RoTxn; -use raw_collections::RawMap; +use rustc_hash::FxBuildHasher; use serde_json::value::RawValue; use super::vector_document::VectorDocument; @@ -385,12 +386,12 @@ pub type Entry<'doc> = (&'doc str, &'doc RawValue); #[derive(Debug)] pub struct Versions<'doc> { - data: RawMap<'doc>, + data: RawMap<'doc, FxBuildHasher>, } impl<'doc> Versions<'doc> { pub fn multiple( - mut versions: impl Iterator>>, + mut versions: impl Iterator>>, ) -> Result> { let Some(data) = versions.next() else { return Ok(None) }; let mut data = data?; @@ -403,7 +404,7 @@ impl<'doc> Versions<'doc> { Ok(Some(Self::single(data))) } - pub fn single(version: RawMap<'doc>) -> Self { + pub fn single(version: RawMap<'doc, FxBuildHasher>) -> Self { Self { data: version } } diff --git a/crates/milli/src/update/new/extract/cache.rs b/crates/milli/src/update/new/extract/cache.rs index 658a3127c..09ca60211 100644 --- a/crates/milli/src/update/new/extract/cache.rs +++ b/crates/milli/src/update/new/extract/cache.rs @@ -69,12 +69,12 @@ use std::io::BufReader; use std::{io, iter, mem}; use bumpalo::Bump; +use bumparaw_collections::bbbul::{BitPacker, BitPacker4x}; +use bumparaw_collections::map::FrozenMap; +use bumparaw_collections::{Bbbul, FrozenBbbul}; use grenad::ReaderCursor; use hashbrown::hash_map::RawEntryMut; use hashbrown::HashMap; -use raw_collections::bbbul::{BitPacker, BitPacker4x}; -use raw_collections::map::FrozenMap; -use raw_collections::{Bbbul, FrozenBbbul}; use roaring::RoaringBitmap; use rustc_hash::FxBuildHasher; diff --git a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs index ffdce5b7e..1c1605b66 100644 --- a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs +++ b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs @@ -176,9 +176,10 @@ pub fn tokenizer_builder<'a>( #[cfg(test)] mod test { use bumpalo::Bump; + use bumparaw_collections::RawMap; use charabia::TokenizerBuilder; use meili_snap::snapshot; - use raw_collections::RawMap; + use rustc_hash::FxBuildHasher; use serde_json::json; use serde_json::value::RawValue; @@ -234,7 +235,7 @@ mod test { let bump = Bump::new(); let document: &RawValue = serde_json::from_str(&document).unwrap(); - let document = RawMap::from_raw_value(document, &bump).unwrap(); + let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, &bump).unwrap(); let document = Versions::single(document); let document = DocumentFromVersions::new(&document); diff --git a/crates/milli/src/update/new/indexer/de.rs b/crates/milli/src/update/new/indexer/de.rs index c9808360e..4d9fa40a1 100644 --- a/crates/milli/src/update/new/indexer/de.rs +++ b/crates/milli/src/update/new/indexer/de.rs @@ -1,6 +1,8 @@ use std::ops::ControlFlow; use bumpalo::Bump; +use bumparaw_collections::RawVec; +use rustc_hash::FxBuildHasher; use serde::de::{DeserializeSeed, Deserializer as _, Visitor}; use serde_json::value::RawValue; @@ -360,7 +362,7 @@ impl<'a> DeserrRawValue<'a> { } pub struct DeserrRawVec<'a> { - vec: raw_collections::RawVec<'a>, + vec: RawVec<'a>, alloc: &'a Bump, } @@ -379,7 +381,7 @@ impl<'a> deserr::Sequence for DeserrRawVec<'a> { } pub struct DeserrRawVecIter<'a> { - it: raw_collections::vec::iter::IntoIter<'a>, + it: bumparaw_collections::vec::iter::IntoIter<'a>, alloc: &'a Bump, } @@ -393,7 +395,7 @@ impl<'a> Iterator for DeserrRawVecIter<'a> { } pub struct DeserrRawMap<'a> { - map: raw_collections::RawMap<'a>, + map: bumparaw_collections::RawMap<'a, FxBuildHasher>, alloc: &'a Bump, } @@ -416,7 +418,7 @@ impl<'a> deserr::Map for DeserrRawMap<'a> { } pub struct DeserrRawMapIter<'a> { - it: raw_collections::map::iter::IntoIter<'a>, + it: bumparaw_collections::map::iter::IntoIter<'a>, alloc: &'a Bump, } @@ -615,7 +617,7 @@ impl<'de> Visitor<'de> for DeserrRawValueVisitor<'de> { where A: serde::de::SeqAccess<'de>, { - let mut raw_vec = raw_collections::RawVec::new_in(self.alloc); + let mut raw_vec = RawVec::new_in(self.alloc); while let Some(next) = seq.next_element()? { raw_vec.push(next); } diff --git a/crates/milli/src/update/new/indexer/document_operation.rs b/crates/milli/src/update/new/indexer/document_operation.rs index 2a381d5d1..0b7ec493e 100644 --- a/crates/milli/src/update/new/indexer/document_operation.rs +++ b/crates/milli/src/update/new/indexer/document_operation.rs @@ -1,10 +1,11 @@ use bumpalo::collections::CollectIn; use bumpalo::Bump; +use bumparaw_collections::RawMap; use hashbrown::hash_map::Entry; use heed::RoTxn; use memmap2::Mmap; -use raw_collections::RawMap; use rayon::slice::ParallelSlice; +use rustc_hash::FxBuildHasher; use serde_json::value::RawValue; use serde_json::Deserializer; @@ -166,8 +167,9 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>( // Only guess the primary key if it is the first document let retrieved_primary_key = if previous_offset == 0 { - let doc = - RawMap::from_raw_value(doc, indexer).map(Some).map_err(UserError::SerdeJson)?; + let doc = RawMap::from_raw_value_and_hasher(doc, FxBuildHasher, indexer) + .map(Some) + .map_err(UserError::SerdeJson)?; let result = retrieve_or_guess_primary_key( rtxn, @@ -545,8 +547,9 @@ impl MergeChanges for MergeDocumentForReplacement { match operations.last() { Some(InnerDocOp::Addition(DocumentOffset { content })) => { let document = serde_json::from_slice(content).unwrap(); - let document = raw_collections::RawMap::from_raw_value(document, doc_alloc) - .map_err(UserError::SerdeJson)?; + let document = + RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc) + .map_err(UserError::SerdeJson)?; if is_new { Ok(Some(DocumentChange::Insertion(Insertion::create( @@ -632,8 +635,9 @@ impl MergeChanges for MergeDocumentForUpdates { } }; let document = serde_json::from_slice(content).unwrap(); - let document = raw_collections::RawMap::from_raw_value(document, doc_alloc) - .map_err(UserError::SerdeJson)?; + let document = + RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc) + .map_err(UserError::SerdeJson)?; Some(Versions::single(document)) } @@ -647,8 +651,9 @@ impl MergeChanges for MergeDocumentForUpdates { }; let document = serde_json::from_slice(content).unwrap(); - let document = raw_collections::RawMap::from_raw_value(document, doc_alloc) - .map_err(UserError::SerdeJson)?; + let document = + RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc) + .map_err(UserError::SerdeJson)?; Ok(document) }); Versions::multiple(versions)? diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index 59088bd47..601645385 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -4,6 +4,7 @@ use std::sync::{OnceLock, RwLock}; use std::thread::{self, Builder}; use big_s::S; +use bumparaw_collections::RawMap; use document_changes::{extract, DocumentChanges, IndexingContext, Progress}; pub use document_deletion::DocumentDeletion; pub use document_operation::{DocumentOperation, PayloadStats}; @@ -13,7 +14,7 @@ use heed::{RoTxn, RwTxn}; use itertools::{merge_join_by, EitherOrBoth}; pub use partial_dump::PartialDump; use rand::SeedableRng as _; -use raw_collections::RawMap; +use rustc_hash::FxBuildHasher; use time::OffsetDateTime; pub use update_by_function::UpdateByFunction; @@ -776,7 +777,7 @@ pub fn retrieve_or_guess_primary_key<'a>( index: &Index, new_fields_ids_map: &mut FieldsIdsMap, primary_key_from_op: Option<&'a str>, - first_document: Option>, + first_document: Option>, ) -> Result, bool), UserError>> { // make sure that we have a declared primary key, either fetching it from the index or attempting to guess it. diff --git a/crates/milli/src/update/new/indexer/partial_dump.rs b/crates/milli/src/update/new/indexer/partial_dump.rs index 2cc653813..6e4abd898 100644 --- a/crates/milli/src/update/new/indexer/partial_dump.rs +++ b/crates/milli/src/update/new/indexer/partial_dump.rs @@ -1,6 +1,8 @@ use std::ops::DerefMut; +use bumparaw_collections::RawMap; use rayon::iter::IndexedParallelIterator; +use rustc_hash::FxBuildHasher; use serde_json::value::RawValue; use super::document_changes::{DocumentChangeContext, DocumentChanges}; @@ -75,7 +77,7 @@ where self.primary_key.extract_fields_and_docid(document, fields_ids_map, doc_alloc)?; let external_document_id = external_document_id.to_de(); - let document = raw_collections::RawMap::from_raw_value(document, doc_alloc) + let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc) .map_err(InternalError::SerdeJson)?; let insertion = Insertion::create(docid, external_document_id, Versions::single(document)); diff --git a/crates/milli/src/update/new/indexer/update_by_function.rs b/crates/milli/src/update/new/indexer/update_by_function.rs index a8e3e38a8..3001648e6 100644 --- a/crates/milli/src/update/new/indexer/update_by_function.rs +++ b/crates/milli/src/update/new/indexer/update_by_function.rs @@ -1,8 +1,9 @@ -use raw_collections::RawMap; +use bumparaw_collections::RawMap; use rayon::iter::IndexedParallelIterator; use rayon::slice::ParallelSlice as _; use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST}; use roaring::RoaringBitmap; +use rustc_hash::FxBuildHasher; use super::document_changes::DocumentChangeContext; use super::DocumentChanges; @@ -160,8 +161,12 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> { if document_id != new_document_id { Err(Error::UserError(UserError::DocumentEditionCannotModifyPrimaryKey)) } else { - let raw_new_doc = RawMap::from_raw_value(raw_new_doc, doc_alloc) - .map_err(InternalError::SerdeJson)?; + let raw_new_doc = RawMap::from_raw_value_and_hasher( + raw_new_doc, + FxBuildHasher, + doc_alloc, + ) + .map_err(InternalError::SerdeJson)?; Ok(Some(DocumentChange::Update(Update::create( docid, diff --git a/crates/milli/src/update/new/vector_document.rs b/crates/milli/src/update/new/vector_document.rs index 319730db0..8d14a749d 100644 --- a/crates/milli/src/update/new/vector_document.rs +++ b/crates/milli/src/update/new/vector_document.rs @@ -1,9 +1,10 @@ use std::collections::BTreeSet; use bumpalo::Bump; +use bumparaw_collections::RawMap; use deserr::{Deserr, IntoValue}; use heed::RoTxn; -use raw_collections::RawMap; +use rustc_hash::FxBuildHasher; use serde::Serialize; use serde_json::value::RawValue; @@ -84,7 +85,7 @@ pub struct VectorDocumentFromDb<'t> { docid: DocumentId, embedding_config: Vec, index: &'t Index, - vectors_field: Option>, + vectors_field: Option>, rtxn: &'t RoTxn<'t>, doc_alloc: &'t Bump, } @@ -102,9 +103,10 @@ impl<'t> VectorDocumentFromDb<'t> { }; let vectors = document.vectors_field()?; let vectors_field = match vectors { - Some(vectors) => { - Some(RawMap::from_raw_value(vectors, doc_alloc).map_err(InternalError::SerdeJson)?) - } + Some(vectors) => Some( + RawMap::from_raw_value_and_hasher(vectors, FxBuildHasher, doc_alloc) + .map_err(InternalError::SerdeJson)?, + ), None => None, }; @@ -220,7 +222,7 @@ fn entry_from_raw_value( pub struct VectorDocumentFromVersions<'doc> { external_document_id: &'doc str, - vectors: RawMap<'doc>, + vectors: RawMap<'doc, FxBuildHasher>, embedders: &'doc EmbeddingConfigs, } @@ -233,8 +235,8 @@ impl<'doc> VectorDocumentFromVersions<'doc> { ) -> Result> { let document = DocumentFromVersions::new(versions); if let Some(vectors_field) = document.vectors_field()? { - let vectors = - RawMap::from_raw_value(vectors_field, bump).map_err(UserError::SerdeJson)?; + let vectors = RawMap::from_raw_value_and_hasher(vectors_field, FxBuildHasher, bump) + .map_err(UserError::SerdeJson)?; Ok(Some(Self { external_document_id, vectors, embedders })) } else { Ok(None)