Use bumparaw-collections in Meilisearch/milli

This commit is contained in:
Kerollmops 2024-12-10 11:12:27 +01:00
parent 1995040846
commit 89637bcaaf
No known key found for this signature in database
GPG key ID: F250A4C4E3AE5F5F
15 changed files with 78 additions and 70 deletions

View file

@ -91,8 +91,8 @@ ureq = { version = "2.10.0", features = ["json"] }
url = "2.5.2"
rayon-par-bridge = "0.1.0"
hashbrown = "0.15.0"
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
bumpalo = "3.16.0"
bumparaw-collections = "0.1.1"
thread_local = "1.1.8"
allocator-api2 = "0.2.18"
rustc-hash = "2.0.0"

View file

@ -3,12 +3,12 @@ use std::collections::BTreeMap;
use std::fmt::{self, Debug};
use bumpalo::Bump;
use bumparaw_collections::{RawMap, RawVec, Value};
use liquid::model::{
ArrayView, DisplayCow, KString, KStringCow, ObjectRender, ObjectSource, ScalarCow, State,
Value as LiquidValue,
};
use liquid::{ObjectView, ValueView};
use raw_collections::{RawMap, RawVec};
use serde_json::value::RawValue;
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
@ -245,12 +245,12 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc,
#[derive(Debug)]
struct ParseableValue<'doc> {
value: raw_collections::Value<'doc>,
value: Value<'doc>,
}
impl<'doc> ParseableValue<'doc> {
pub fn new(value: &'doc RawValue, doc_alloc: &'doc Bump) -> Self {
let value = raw_collections::Value::from_raw_value(value, doc_alloc).unwrap();
let value = Value::from_raw_value(value, doc_alloc).unwrap();
Self { value }
}
@ -447,8 +447,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
}
fn render(&self) -> DisplayCow<'_> {
use raw_collections::value::Number;
use raw_collections::Value;
use bumparaw_collections::value::Number;
use bumparaw_collections::Value;
match &self.value {
Value::Null => LiquidValue::Nil.render(),
Value::Bool(v) => v.render(),
@ -464,8 +465,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
}
fn source(&self) -> DisplayCow<'_> {
use raw_collections::value::Number;
use raw_collections::Value;
use bumparaw_collections::value::Number;
use bumparaw_collections::Value;
match &self.value {
Value::Null => LiquidValue::Nil.source(),
Value::Bool(v) => ValueView::source(v),
@ -481,8 +483,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
}
fn type_name(&self) -> &'static str {
use raw_collections::value::Number;
use raw_collections::Value;
use bumparaw_collections::value::Number;
use bumparaw_collections::Value;
match &self.value {
Value::Null => LiquidValue::Nil.type_name(),
Value::Bool(v) => v.type_name(),
@ -498,7 +501,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
}
fn query_state(&self, state: State) -> bool {
use raw_collections::Value;
use bumparaw_collections::Value;
match &self.value {
Value::Null => ValueView::query_state(&LiquidValue::Nil, state),
Value::Bool(v) => ValueView::query_state(v, state),
@ -515,7 +519,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
}
fn to_kstr(&self) -> KStringCow<'_> {
use raw_collections::Value;
use bumparaw_collections::Value;
match &self.value {
Value::Null => ValueView::to_kstr(&LiquidValue::Nil),
Value::Bool(v) => ValueView::to_kstr(v),
@ -527,12 +532,14 @@ impl<'doc> ValueView for ParseableValue<'doc> {
}
fn to_value(&self) -> LiquidValue {
use raw_collections::Value;
use bumparaw_collections::value::Number;
use bumparaw_collections::Value;
match &self.value {
Value::Null => LiquidValue::Nil,
Value::Bool(v) => LiquidValue::Scalar(liquid::model::ScalarCow::new(*v)),
Value::Number(number) => match number {
raw_collections::value::Number::PosInt(number) => {
Number::PosInt(number) => {
let number: i64 = match (*number).try_into() {
Ok(number) => number,
Err(_) => {
@ -541,12 +548,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
};
LiquidValue::Scalar(ScalarCow::new(number))
}
raw_collections::value::Number::NegInt(number) => {
LiquidValue::Scalar(ScalarCow::new(*number))
}
raw_collections::value::Number::Finite(number) => {
LiquidValue::Scalar(ScalarCow::new(*number))
}
Number::NegInt(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
Number::Finite(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
},
Value::String(s) => LiquidValue::Scalar(liquid::model::ScalarCow::new(s.to_string())),
Value::Array(raw_vec) => ParseableArray::as_parseable(raw_vec).to_value(),
@ -555,8 +558,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
}
fn as_scalar(&self) -> Option<liquid::model::ScalarCow<'_>> {
use raw_collections::value::Number;
use raw_collections::Value;
use bumparaw_collections::value::Number;
use bumparaw_collections::Value;
match &self.value {
Value::Bool(v) => Some(liquid::model::ScalarCow::new(*v)),
Value::Number(number) => match number {
@ -576,34 +580,35 @@ impl<'doc> ValueView for ParseableValue<'doc> {
}
fn is_scalar(&self) -> bool {
use raw_collections::Value;
use bumparaw_collections::Value;
matches!(&self.value, Value::Bool(_) | Value::Number(_) | Value::String(_))
}
fn as_array(&self) -> Option<&dyn liquid::model::ArrayView> {
if let raw_collections::Value::Array(array) = &self.value {
if let Value::Array(array) = &self.value {
return Some(ParseableArray::as_parseable(array) as _);
}
None
}
fn is_array(&self) -> bool {
matches!(&self.value, raw_collections::Value::Array(_))
matches!(&self.value, bumparaw_collections::Value::Array(_))
}
fn as_object(&self) -> Option<&dyn ObjectView> {
if let raw_collections::Value::Object(object) = &self.value {
if let Value::Object(object) = &self.value {
return Some(ParseableMap::as_parseable(object) as _);
}
None
}
fn is_object(&self) -> bool {
matches!(&self.value, raw_collections::Value::Object(_))
matches!(&self.value, bumparaw_collections::Value::Object(_))
}
fn is_nil(&self) -> bool {
matches!(&self.value, raw_collections::Value::Null)
matches!(&self.value, bumparaw_collections::Value::Null)
}
}

View file

@ -1,7 +1,7 @@
use std::collections::{BTreeMap, BTreeSet};
use bumparaw_collections::RawMap;
use heed::RoTxn;
use raw_collections::RawMap;
use serde_json::value::RawValue;
use super::vector_document::VectorDocument;

View file

@ -69,12 +69,12 @@ use std::io::BufReader;
use std::{io, iter, mem};
use bumpalo::Bump;
use bumparaw_collections::bbbul::{BitPacker, BitPacker4x};
use bumparaw_collections::map::FrozenMap;
use bumparaw_collections::{Bbbul, FrozenBbbul};
use grenad::ReaderCursor;
use hashbrown::hash_map::RawEntryMut;
use hashbrown::HashMap;
use raw_collections::bbbul::{BitPacker, BitPacker4x};
use raw_collections::map::FrozenMap;
use raw_collections::{Bbbul, FrozenBbbul};
use roaring::RoaringBitmap;
use rustc_hash::FxBuildHasher;

View file

@ -176,9 +176,9 @@ pub fn tokenizer_builder<'a>(
#[cfg(test)]
mod test {
use bumpalo::Bump;
use bumparaw_collections::RawMap;
use charabia::TokenizerBuilder;
use meili_snap::snapshot;
use raw_collections::RawMap;
use serde_json::json;
use serde_json::value::RawValue;

View file

@ -1,6 +1,7 @@
use std::ops::ControlFlow;
use bumpalo::Bump;
use bumparaw_collections::RawVec;
use serde::de::{DeserializeSeed, Deserializer as _, Visitor};
use serde_json::value::RawValue;
@ -360,7 +361,7 @@ impl<'a> DeserrRawValue<'a> {
}
pub struct DeserrRawVec<'a> {
vec: raw_collections::RawVec<'a>,
vec: RawVec<'a>,
alloc: &'a Bump,
}
@ -379,7 +380,7 @@ impl<'a> deserr::Sequence for DeserrRawVec<'a> {
}
pub struct DeserrRawVecIter<'a> {
it: raw_collections::vec::iter::IntoIter<'a>,
it: bumparaw_collections::vec::iter::IntoIter<'a>,
alloc: &'a Bump,
}
@ -393,7 +394,7 @@ impl<'a> Iterator for DeserrRawVecIter<'a> {
}
pub struct DeserrRawMap<'a> {
map: raw_collections::RawMap<'a>,
map: bumparaw_collections::RawMap<'a>,
alloc: &'a Bump,
}
@ -416,7 +417,7 @@ impl<'a> deserr::Map for DeserrRawMap<'a> {
}
pub struct DeserrRawMapIter<'a> {
it: raw_collections::map::iter::IntoIter<'a>,
it: bumparaw_collections::map::iter::IntoIter<'a>,
alloc: &'a Bump,
}
@ -615,7 +616,7 @@ impl<'de> Visitor<'de> for DeserrRawValueVisitor<'de> {
where
A: serde::de::SeqAccess<'de>,
{
let mut raw_vec = raw_collections::RawVec::new_in(self.alloc);
let mut raw_vec = RawVec::new_in(self.alloc);
while let Some(next) = seq.next_element()? {
raw_vec.push(next);
}

View file

@ -1,9 +1,9 @@
use bumpalo::collections::CollectIn;
use bumpalo::Bump;
use bumparaw_collections::RawMap;
use hashbrown::hash_map::Entry;
use heed::RoTxn;
use memmap2::Mmap;
use raw_collections::RawMap;
use rayon::slice::ParallelSlice;
use serde_json::value::RawValue;
use serde_json::Deserializer;
@ -545,8 +545,8 @@ impl MergeChanges for MergeDocumentForReplacement {
match operations.last() {
Some(InnerDocOp::Addition(DocumentOffset { content })) => {
let document = serde_json::from_slice(content).unwrap();
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
.map_err(UserError::SerdeJson)?;
let document =
RawMap::from_raw_value(document, doc_alloc).map_err(UserError::SerdeJson)?;
if is_new {
Ok(Some(DocumentChange::Insertion(Insertion::create(
@ -632,8 +632,8 @@ impl MergeChanges for MergeDocumentForUpdates {
}
};
let document = serde_json::from_slice(content).unwrap();
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
.map_err(UserError::SerdeJson)?;
let document =
RawMap::from_raw_value(document, doc_alloc).map_err(UserError::SerdeJson)?;
Some(Versions::single(document))
}
@ -647,7 +647,7 @@ impl MergeChanges for MergeDocumentForUpdates {
};
let document = serde_json::from_slice(content).unwrap();
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
let document = RawMap::from_raw_value(document, doc_alloc)
.map_err(UserError::SerdeJson)?;
Ok(document)
});

View file

@ -4,6 +4,7 @@ use std::sync::{OnceLock, RwLock};
use std::thread::{self, Builder};
use big_s::S;
use bumparaw_collections::RawMap;
use document_changes::{extract, DocumentChanges, IndexingContext, Progress};
pub use document_deletion::DocumentDeletion;
pub use document_operation::{DocumentOperation, PayloadStats};
@ -13,7 +14,6 @@ use heed::{RoTxn, RwTxn};
use itertools::{merge_join_by, EitherOrBoth};
pub use partial_dump::PartialDump;
use rand::SeedableRng as _;
use raw_collections::RawMap;
use time::OffsetDateTime;
pub use update_by_function::UpdateByFunction;

View file

@ -1,5 +1,6 @@
use std::ops::DerefMut;
use bumparaw_collections::RawMap;
use rayon::iter::IndexedParallelIterator;
use serde_json::value::RawValue;
@ -75,8 +76,8 @@ where
self.primary_key.extract_fields_and_docid(document, fields_ids_map, doc_alloc)?;
let external_document_id = external_document_id.to_de();
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
.map_err(InternalError::SerdeJson)?;
let document =
RawMap::from_raw_value(document, doc_alloc).map_err(InternalError::SerdeJson)?;
let insertion = Insertion::create(docid, external_document_id, Versions::single(document));
Ok(Some(DocumentChange::Insertion(insertion)))

View file

@ -1,4 +1,4 @@
use raw_collections::RawMap;
use bumparaw_collections::RawMap;
use rayon::iter::IndexedParallelIterator;
use rayon::slice::ParallelSlice as _;
use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST};

View file

@ -1,9 +1,9 @@
use std::collections::BTreeSet;
use bumpalo::Bump;
use bumparaw_collections::RawMap;
use deserr::{Deserr, IntoValue};
use heed::RoTxn;
use raw_collections::RawMap;
use serde::Serialize;
use serde_json::value::RawValue;