mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 05:54:30 +01:00
Prefer using a stable than a random hash builder
This commit is contained in:
parent
6b269795d2
commit
a751972c57
@ -2,6 +2,7 @@ use std::collections::{BTreeMap, BTreeSet};
|
|||||||
|
|
||||||
use bumparaw_collections::RawMap;
|
use bumparaw_collections::RawMap;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
|
use rustc_hash::FxBuildHasher;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
use super::vector_document::VectorDocument;
|
use super::vector_document::VectorDocument;
|
||||||
@ -385,12 +386,12 @@ pub type Entry<'doc> = (&'doc str, &'doc RawValue);
|
|||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Versions<'doc> {
|
pub struct Versions<'doc> {
|
||||||
data: RawMap<'doc>,
|
data: RawMap<'doc, FxBuildHasher>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'doc> Versions<'doc> {
|
impl<'doc> Versions<'doc> {
|
||||||
pub fn multiple(
|
pub fn multiple(
|
||||||
mut versions: impl Iterator<Item = Result<RawMap<'doc>>>,
|
mut versions: impl Iterator<Item = Result<RawMap<'doc, FxBuildHasher>>>,
|
||||||
) -> Result<Option<Self>> {
|
) -> Result<Option<Self>> {
|
||||||
let Some(data) = versions.next() else { return Ok(None) };
|
let Some(data) = versions.next() else { return Ok(None) };
|
||||||
let mut data = data?;
|
let mut data = data?;
|
||||||
@ -403,7 +404,7 @@ impl<'doc> Versions<'doc> {
|
|||||||
Ok(Some(Self::single(data)))
|
Ok(Some(Self::single(data)))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn single(version: RawMap<'doc>) -> Self {
|
pub fn single(version: RawMap<'doc, FxBuildHasher>) -> Self {
|
||||||
Self { data: version }
|
Self { data: version }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -179,6 +179,7 @@ mod test {
|
|||||||
use bumparaw_collections::RawMap;
|
use bumparaw_collections::RawMap;
|
||||||
use charabia::TokenizerBuilder;
|
use charabia::TokenizerBuilder;
|
||||||
use meili_snap::snapshot;
|
use meili_snap::snapshot;
|
||||||
|
use rustc_hash::FxBuildHasher;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
@ -234,7 +235,7 @@ mod test {
|
|||||||
|
|
||||||
let bump = Bump::new();
|
let bump = Bump::new();
|
||||||
let document: &RawValue = serde_json::from_str(&document).unwrap();
|
let document: &RawValue = serde_json::from_str(&document).unwrap();
|
||||||
let document = RawMap::from_raw_value(document, &bump).unwrap();
|
let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, &bump).unwrap();
|
||||||
|
|
||||||
let document = Versions::single(document);
|
let document = Versions::single(document);
|
||||||
let document = DocumentFromVersions::new(&document);
|
let document = DocumentFromVersions::new(&document);
|
||||||
|
@ -2,6 +2,7 @@ use std::ops::ControlFlow;
|
|||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use bumparaw_collections::RawVec;
|
use bumparaw_collections::RawVec;
|
||||||
|
use rustc_hash::FxBuildHasher;
|
||||||
use serde::de::{DeserializeSeed, Deserializer as _, Visitor};
|
use serde::de::{DeserializeSeed, Deserializer as _, Visitor};
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
@ -394,7 +395,7 @@ impl<'a> Iterator for DeserrRawVecIter<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct DeserrRawMap<'a> {
|
pub struct DeserrRawMap<'a> {
|
||||||
map: bumparaw_collections::RawMap<'a>,
|
map: bumparaw_collections::RawMap<'a, FxBuildHasher>,
|
||||||
alloc: &'a Bump,
|
alloc: &'a Bump,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ use hashbrown::hash_map::Entry;
|
|||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
use rayon::slice::ParallelSlice;
|
use rayon::slice::ParallelSlice;
|
||||||
|
use rustc_hash::FxBuildHasher;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
use serde_json::Deserializer;
|
use serde_json::Deserializer;
|
||||||
|
|
||||||
@ -166,8 +167,9 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
|
|||||||
|
|
||||||
// Only guess the primary key if it is the first document
|
// Only guess the primary key if it is the first document
|
||||||
let retrieved_primary_key = if previous_offset == 0 {
|
let retrieved_primary_key = if previous_offset == 0 {
|
||||||
let doc =
|
let doc = RawMap::from_raw_value_and_hasher(doc, FxBuildHasher, indexer)
|
||||||
RawMap::from_raw_value(doc, indexer).map(Some).map_err(UserError::SerdeJson)?;
|
.map(Some)
|
||||||
|
.map_err(UserError::SerdeJson)?;
|
||||||
|
|
||||||
let result = retrieve_or_guess_primary_key(
|
let result = retrieve_or_guess_primary_key(
|
||||||
rtxn,
|
rtxn,
|
||||||
@ -546,7 +548,8 @@ impl MergeChanges for MergeDocumentForReplacement {
|
|||||||
Some(InnerDocOp::Addition(DocumentOffset { content })) => {
|
Some(InnerDocOp::Addition(DocumentOffset { content })) => {
|
||||||
let document = serde_json::from_slice(content).unwrap();
|
let document = serde_json::from_slice(content).unwrap();
|
||||||
let document =
|
let document =
|
||||||
RawMap::from_raw_value(document, doc_alloc).map_err(UserError::SerdeJson)?;
|
RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
||||||
|
.map_err(UserError::SerdeJson)?;
|
||||||
|
|
||||||
if is_new {
|
if is_new {
|
||||||
Ok(Some(DocumentChange::Insertion(Insertion::create(
|
Ok(Some(DocumentChange::Insertion(Insertion::create(
|
||||||
@ -633,7 +636,8 @@ impl MergeChanges for MergeDocumentForUpdates {
|
|||||||
};
|
};
|
||||||
let document = serde_json::from_slice(content).unwrap();
|
let document = serde_json::from_slice(content).unwrap();
|
||||||
let document =
|
let document =
|
||||||
RawMap::from_raw_value(document, doc_alloc).map_err(UserError::SerdeJson)?;
|
RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
||||||
|
.map_err(UserError::SerdeJson)?;
|
||||||
|
|
||||||
Some(Versions::single(document))
|
Some(Versions::single(document))
|
||||||
}
|
}
|
||||||
@ -647,8 +651,9 @@ impl MergeChanges for MergeDocumentForUpdates {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let document = serde_json::from_slice(content).unwrap();
|
let document = serde_json::from_slice(content).unwrap();
|
||||||
let document = RawMap::from_raw_value(document, doc_alloc)
|
let document =
|
||||||
.map_err(UserError::SerdeJson)?;
|
RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
||||||
|
.map_err(UserError::SerdeJson)?;
|
||||||
Ok(document)
|
Ok(document)
|
||||||
});
|
});
|
||||||
Versions::multiple(versions)?
|
Versions::multiple(versions)?
|
||||||
|
@ -14,6 +14,7 @@ use heed::{RoTxn, RwTxn};
|
|||||||
use itertools::{merge_join_by, EitherOrBoth};
|
use itertools::{merge_join_by, EitherOrBoth};
|
||||||
pub use partial_dump::PartialDump;
|
pub use partial_dump::PartialDump;
|
||||||
use rand::SeedableRng as _;
|
use rand::SeedableRng as _;
|
||||||
|
use rustc_hash::FxBuildHasher;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
pub use update_by_function::UpdateByFunction;
|
pub use update_by_function::UpdateByFunction;
|
||||||
|
|
||||||
@ -776,7 +777,7 @@ pub fn retrieve_or_guess_primary_key<'a>(
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
new_fields_ids_map: &mut FieldsIdsMap,
|
new_fields_ids_map: &mut FieldsIdsMap,
|
||||||
primary_key_from_op: Option<&'a str>,
|
primary_key_from_op: Option<&'a str>,
|
||||||
first_document: Option<RawMap<'a>>,
|
first_document: Option<RawMap<'a, FxBuildHasher>>,
|
||||||
) -> Result<StdResult<(PrimaryKey<'a>, bool), UserError>> {
|
) -> Result<StdResult<(PrimaryKey<'a>, bool), UserError>> {
|
||||||
// make sure that we have a declared primary key, either fetching it from the index or attempting to guess it.
|
// make sure that we have a declared primary key, either fetching it from the index or attempting to guess it.
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ use std::ops::DerefMut;
|
|||||||
|
|
||||||
use bumparaw_collections::RawMap;
|
use bumparaw_collections::RawMap;
|
||||||
use rayon::iter::IndexedParallelIterator;
|
use rayon::iter::IndexedParallelIterator;
|
||||||
|
use rustc_hash::FxBuildHasher;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
use super::document_changes::{DocumentChangeContext, DocumentChanges};
|
use super::document_changes::{DocumentChangeContext, DocumentChanges};
|
||||||
@ -76,8 +77,8 @@ where
|
|||||||
self.primary_key.extract_fields_and_docid(document, fields_ids_map, doc_alloc)?;
|
self.primary_key.extract_fields_and_docid(document, fields_ids_map, doc_alloc)?;
|
||||||
let external_document_id = external_document_id.to_de();
|
let external_document_id = external_document_id.to_de();
|
||||||
|
|
||||||
let document =
|
let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
|
||||||
RawMap::from_raw_value(document, doc_alloc).map_err(InternalError::SerdeJson)?;
|
.map_err(InternalError::SerdeJson)?;
|
||||||
|
|
||||||
let insertion = Insertion::create(docid, external_document_id, Versions::single(document));
|
let insertion = Insertion::create(docid, external_document_id, Versions::single(document));
|
||||||
Ok(Some(DocumentChange::Insertion(insertion)))
|
Ok(Some(DocumentChange::Insertion(insertion)))
|
||||||
|
@ -3,6 +3,7 @@ use rayon::iter::IndexedParallelIterator;
|
|||||||
use rayon::slice::ParallelSlice as _;
|
use rayon::slice::ParallelSlice as _;
|
||||||
use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST};
|
use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
use rustc_hash::FxBuildHasher;
|
||||||
|
|
||||||
use super::document_changes::DocumentChangeContext;
|
use super::document_changes::DocumentChangeContext;
|
||||||
use super::DocumentChanges;
|
use super::DocumentChanges;
|
||||||
@ -160,8 +161,12 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> {
|
|||||||
if document_id != new_document_id {
|
if document_id != new_document_id {
|
||||||
Err(Error::UserError(UserError::DocumentEditionCannotModifyPrimaryKey))
|
Err(Error::UserError(UserError::DocumentEditionCannotModifyPrimaryKey))
|
||||||
} else {
|
} else {
|
||||||
let raw_new_doc = RawMap::from_raw_value(raw_new_doc, doc_alloc)
|
let raw_new_doc = RawMap::from_raw_value_and_hasher(
|
||||||
.map_err(InternalError::SerdeJson)?;
|
raw_new_doc,
|
||||||
|
FxBuildHasher,
|
||||||
|
doc_alloc,
|
||||||
|
)
|
||||||
|
.map_err(InternalError::SerdeJson)?;
|
||||||
|
|
||||||
Ok(Some(DocumentChange::Update(Update::create(
|
Ok(Some(DocumentChange::Update(Update::create(
|
||||||
docid,
|
docid,
|
||||||
|
@ -4,6 +4,7 @@ use bumpalo::Bump;
|
|||||||
use bumparaw_collections::RawMap;
|
use bumparaw_collections::RawMap;
|
||||||
use deserr::{Deserr, IntoValue};
|
use deserr::{Deserr, IntoValue};
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
|
use rustc_hash::FxBuildHasher;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
@ -84,7 +85,7 @@ pub struct VectorDocumentFromDb<'t> {
|
|||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
embedding_config: Vec<IndexEmbeddingConfig>,
|
embedding_config: Vec<IndexEmbeddingConfig>,
|
||||||
index: &'t Index,
|
index: &'t Index,
|
||||||
vectors_field: Option<RawMap<'t>>,
|
vectors_field: Option<RawMap<'t, FxBuildHasher>>,
|
||||||
rtxn: &'t RoTxn<'t>,
|
rtxn: &'t RoTxn<'t>,
|
||||||
doc_alloc: &'t Bump,
|
doc_alloc: &'t Bump,
|
||||||
}
|
}
|
||||||
@ -102,9 +103,10 @@ impl<'t> VectorDocumentFromDb<'t> {
|
|||||||
};
|
};
|
||||||
let vectors = document.vectors_field()?;
|
let vectors = document.vectors_field()?;
|
||||||
let vectors_field = match vectors {
|
let vectors_field = match vectors {
|
||||||
Some(vectors) => {
|
Some(vectors) => Some(
|
||||||
Some(RawMap::from_raw_value(vectors, doc_alloc).map_err(InternalError::SerdeJson)?)
|
RawMap::from_raw_value_and_hasher(vectors, FxBuildHasher, doc_alloc)
|
||||||
}
|
.map_err(InternalError::SerdeJson)?,
|
||||||
|
),
|
||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -220,7 +222,7 @@ fn entry_from_raw_value(
|
|||||||
|
|
||||||
pub struct VectorDocumentFromVersions<'doc> {
|
pub struct VectorDocumentFromVersions<'doc> {
|
||||||
external_document_id: &'doc str,
|
external_document_id: &'doc str,
|
||||||
vectors: RawMap<'doc>,
|
vectors: RawMap<'doc, FxBuildHasher>,
|
||||||
embedders: &'doc EmbeddingConfigs,
|
embedders: &'doc EmbeddingConfigs,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -233,8 +235,8 @@ impl<'doc> VectorDocumentFromVersions<'doc> {
|
|||||||
) -> Result<Option<Self>> {
|
) -> Result<Option<Self>> {
|
||||||
let document = DocumentFromVersions::new(versions);
|
let document = DocumentFromVersions::new(versions);
|
||||||
if let Some(vectors_field) = document.vectors_field()? {
|
if let Some(vectors_field) = document.vectors_field()? {
|
||||||
let vectors =
|
let vectors = RawMap::from_raw_value_and_hasher(vectors_field, FxBuildHasher, bump)
|
||||||
RawMap::from_raw_value(vectors_field, bump).map_err(UserError::SerdeJson)?;
|
.map_err(UserError::SerdeJson)?;
|
||||||
Ok(Some(Self { external_document_id, vectors, embedders }))
|
Ok(Some(Self { external_document_id, vectors, embedders }))
|
||||||
} else {
|
} else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user