mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Allow random access to fields in documents
This commit is contained in:
parent
65470e26e0
commit
5efd70c251
@ -176,6 +176,7 @@ mod test {
|
|||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||||
use crate::update::new::document::{DocumentFromVersions, Versions};
|
use crate::update::new::document::{DocumentFromVersions, Versions};
|
||||||
use crate::FieldsIdsMap;
|
use crate::FieldsIdsMap;
|
||||||
|
|
||||||
@ -212,6 +213,11 @@ mod test {
|
|||||||
max_positions_per_attributes: 1000,
|
max_positions_per_attributes: 1000,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let fields_ids_map = FieldIdMapWithMetadata::new(
|
||||||
|
fields_ids_map,
|
||||||
|
MetadataBuilder::new(Default::default(), Default::default(), Default::default(), None),
|
||||||
|
);
|
||||||
|
|
||||||
let fields_ids_map_lock = std::sync::RwLock::new(fields_ids_map);
|
let fields_ids_map_lock = std::sync::RwLock::new(fields_ids_map);
|
||||||
let mut global_fields_ids_map = GlobalFieldsIdsMap::new(&fields_ids_map_lock);
|
let mut global_fields_ids_map = GlobalFieldsIdsMap::new(&fields_ids_map_lock);
|
||||||
|
|
||||||
@ -223,7 +229,8 @@ mod test {
|
|||||||
let document: &RawValue = serde_json::from_str(&document).unwrap();
|
let document: &RawValue = serde_json::from_str(&document).unwrap();
|
||||||
let document = RawMap::from_raw_value(document, &bump).unwrap();
|
let document = RawMap::from_raw_value(document, &bump).unwrap();
|
||||||
|
|
||||||
let document = DocumentFromVersions::new(Versions::single(document));
|
let document = Versions::single(document);
|
||||||
|
let document = DocumentFromVersions::new(&document);
|
||||||
|
|
||||||
document_tokenizer
|
document_tokenizer
|
||||||
.tokenize_document(
|
.tokenize_document(
|
||||||
|
@ -7,6 +7,7 @@ use raw_collections::alloc::RefBump;
|
|||||||
use rayon::iter::IndexedParallelIterator;
|
use rayon::iter::IndexedParallelIterator;
|
||||||
|
|
||||||
use super::super::document_change::DocumentChange;
|
use super::super::document_change::DocumentChange;
|
||||||
|
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
||||||
use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _;
|
use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _;
|
||||||
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, Result};
|
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, Result};
|
||||||
|
|
||||||
@ -278,7 +279,7 @@ impl<
|
|||||||
pub fn new<F>(
|
pub fn new<F>(
|
||||||
index: &'indexer Index,
|
index: &'indexer Index,
|
||||||
db_fields_ids_map: &'indexer FieldsIdsMap,
|
db_fields_ids_map: &'indexer FieldsIdsMap,
|
||||||
new_fields_ids_map: &'fid RwLock<FieldsIdsMap>,
|
new_fields_ids_map: &'fid RwLock<FieldIdMapWithMetadata>,
|
||||||
extractor_allocs: &'extractor ThreadLocal<FullySend<RefCell<Bump>>>,
|
extractor_allocs: &'extractor ThreadLocal<FullySend<RefCell<Bump>>>,
|
||||||
doc_allocs: &'doc ThreadLocal<FullySend<Cell<Bump>>>,
|
doc_allocs: &'doc ThreadLocal<FullySend<Cell<Bump>>>,
|
||||||
datastore: &'data ThreadLocal<T>,
|
datastore: &'data ThreadLocal<T>,
|
||||||
@ -351,7 +352,7 @@ pub struct IndexingContext<
|
|||||||
> {
|
> {
|
||||||
pub index: &'index Index,
|
pub index: &'index Index,
|
||||||
pub db_fields_ids_map: &'indexer FieldsIdsMap,
|
pub db_fields_ids_map: &'indexer FieldsIdsMap,
|
||||||
pub new_fields_ids_map: &'fid RwLock<FieldsIdsMap>,
|
pub new_fields_ids_map: &'fid RwLock<FieldIdMapWithMetadata>,
|
||||||
pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>,
|
pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>,
|
||||||
pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>,
|
pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>,
|
||||||
}
|
}
|
||||||
|
@ -86,6 +86,7 @@ mod test {
|
|||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use raw_collections::alloc::RefBump;
|
use raw_collections::alloc::RefBump;
|
||||||
|
|
||||||
|
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
use crate::update::new::indexer::document_changes::{
|
use crate::update::new::indexer::document_changes::{
|
||||||
for_each_document_change, DocumentChangeContext, Extractor, IndexingContext, MostlySend,
|
for_each_document_change, DocumentChangeContext, Extractor, IndexingContext, MostlySend,
|
||||||
@ -144,7 +145,9 @@ mod test {
|
|||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
let fields_ids_map = RwLock::new(db_fields_ids_map.clone());
|
let metadata_builder = MetadataBuilder::from_index(&index, &rtxn).unwrap();
|
||||||
|
let fields_ids_map =
|
||||||
|
RwLock::new(FieldIdMapWithMetadata::new(db_fields_ids_map.clone(), metadata_builder));
|
||||||
|
|
||||||
let fields_ids_map_store = ThreadLocal::new();
|
let fields_ids_map_store = ThreadLocal::new();
|
||||||
|
|
||||||
|
@ -289,19 +289,17 @@ impl MergeChanges for MergeDocumentForReplacement {
|
|||||||
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
||||||
.map_err(UserError::SerdeJson)?;
|
.map_err(UserError::SerdeJson)?;
|
||||||
|
|
||||||
let document = DocumentFromVersions::new(Versions::single(document));
|
|
||||||
|
|
||||||
if is_new {
|
if is_new {
|
||||||
Ok(Some(DocumentChange::Insertion(Insertion::create(
|
Ok(Some(DocumentChange::Insertion(Insertion::create(
|
||||||
docid,
|
docid,
|
||||||
external_doc,
|
external_doc,
|
||||||
document,
|
Versions::single(document),
|
||||||
))))
|
))))
|
||||||
} else {
|
} else {
|
||||||
Ok(Some(DocumentChange::Update(Update::create(
|
Ok(Some(DocumentChange::Update(Update::create(
|
||||||
docid,
|
docid,
|
||||||
external_doc,
|
external_doc,
|
||||||
document,
|
Versions::single(document),
|
||||||
true,
|
true,
|
||||||
))))
|
))))
|
||||||
}
|
}
|
||||||
@ -396,15 +394,13 @@ impl MergeChanges for MergeDocumentForUpdates {
|
|||||||
|
|
||||||
let Some(versions) = versions else { return Ok(None) };
|
let Some(versions) = versions else { return Ok(None) };
|
||||||
|
|
||||||
let document = DocumentFromVersions::new(versions);
|
|
||||||
|
|
||||||
if is_new {
|
if is_new {
|
||||||
Ok(Some(DocumentChange::Insertion(Insertion::create(docid, external_docid, document))))
|
Ok(Some(DocumentChange::Insertion(Insertion::create(docid, external_docid, versions))))
|
||||||
} else {
|
} else {
|
||||||
Ok(Some(DocumentChange::Update(Update::create(
|
Ok(Some(DocumentChange::Update(Update::create(
|
||||||
docid,
|
docid,
|
||||||
external_docid,
|
external_docid,
|
||||||
document,
|
versions,
|
||||||
has_deletion,
|
has_deletion,
|
||||||
))))
|
))))
|
||||||
}
|
}
|
||||||
|
@ -28,6 +28,7 @@ use super::words_prefix_docids::{
|
|||||||
use super::{StdResult, TopLevelMap};
|
use super::{StdResult, TopLevelMap};
|
||||||
use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY};
|
use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
|
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||||
use crate::proximity::ProximityPrecision;
|
use crate::proximity::ProximityPrecision;
|
||||||
use crate::update::new::channel::ExtractorSender;
|
use crate::update::new::channel::ExtractorSender;
|
||||||
use crate::update::new::words_prefix_docids::compute_exact_word_prefix_docids;
|
use crate::update::new::words_prefix_docids::compute_exact_word_prefix_docids;
|
||||||
@ -122,6 +123,10 @@ where
|
|||||||
// This channel acts as a rendezvous point to ensure that we are one task ahead
|
// This channel acts as a rendezvous point to ensure that we are one task ahead
|
||||||
let (extractor_sender, merger_receiver) = extractors_merger_channels(4);
|
let (extractor_sender, merger_receiver) = extractors_merger_channels(4);
|
||||||
|
|
||||||
|
let metadata_builder = MetadataBuilder::from_index(index, wtxn)?;
|
||||||
|
|
||||||
|
let new_fields_ids_map = FieldIdMapWithMetadata::new(new_fields_ids_map, metadata_builder);
|
||||||
|
|
||||||
let new_fields_ids_map = RwLock::new(new_fields_ids_map);
|
let new_fields_ids_map = RwLock::new(new_fields_ids_map);
|
||||||
|
|
||||||
let fields_ids_map_store = ThreadLocal::with_capacity(pool.current_num_threads());
|
let fields_ids_map_store = ThreadLocal::with_capacity(pool.current_num_threads());
|
||||||
@ -298,8 +303,8 @@ where
|
|||||||
// required to into_inner the new_fields_ids_map
|
// required to into_inner the new_fields_ids_map
|
||||||
drop(fields_ids_map_store);
|
drop(fields_ids_map_store);
|
||||||
|
|
||||||
let fields_ids_map = new_fields_ids_map.into_inner().unwrap();
|
let new_fields_ids_map = new_fields_ids_map.into_inner().unwrap();
|
||||||
index.put_fields_ids_map(wtxn, &fields_ids_map)?;
|
index.put_fields_ids_map(wtxn, new_fields_ids_map.as_fields_ids_map())?;
|
||||||
|
|
||||||
if let Some(new_primary_key) = new_primary_key {
|
if let Some(new_primary_key) = new_primary_key {
|
||||||
index.put_primary_key(wtxn, new_primary_key.name())?;
|
index.put_primary_key(wtxn, new_primary_key.name())?;
|
||||||
|
@ -76,9 +76,7 @@ where
|
|||||||
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
|
||||||
.map_err(InternalError::SerdeJson)?;
|
.map_err(InternalError::SerdeJson)?;
|
||||||
|
|
||||||
let document = DocumentFromVersions::new(Versions::single(document));
|
let insertion = Insertion::create(docid, external_document_id, Versions::single(document));
|
||||||
|
|
||||||
let insertion = Insertion::create(docid, external_document_id, document);
|
|
||||||
Ok(Some(DocumentChange::Insertion(insertion)))
|
Ok(Some(DocumentChange::Insertion(insertion)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -160,12 +160,11 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> {
|
|||||||
} else {
|
} else {
|
||||||
let raw_new_doc = RawMap::from_raw_value(raw_new_doc, doc_alloc)
|
let raw_new_doc = RawMap::from_raw_value(raw_new_doc, doc_alloc)
|
||||||
.map_err(InternalError::SerdeJson)?;
|
.map_err(InternalError::SerdeJson)?;
|
||||||
let new_doc_version =
|
|
||||||
DocumentFromVersions::new(Versions::single(raw_new_doc));
|
|
||||||
Ok(Some(DocumentChange::Update(Update::create(
|
Ok(Some(DocumentChange::Update(Update::create(
|
||||||
docid,
|
docid,
|
||||||
new_document_id,
|
new_document_id,
|
||||||
new_doc_version,
|
Versions::single(raw_new_doc),
|
||||||
true, // It is like document replacement
|
true, // It is like document replacement
|
||||||
))))
|
))))
|
||||||
}
|
}
|
||||||
|
@ -4,12 +4,12 @@ use raw_collections::RawMap;
|
|||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
use super::document::{Document, DocumentFromDb};
|
use super::document::{Document, DocumentFromDb, DocumentFromVersions, Versions};
|
||||||
use crate::documents::FieldIdMapper;
|
use crate::documents::FieldIdMapper;
|
||||||
use crate::index::IndexEmbeddingConfig;
|
use crate::index::IndexEmbeddingConfig;
|
||||||
use crate::vector::parsed_vectors::RawVectors;
|
use crate::vector::parsed_vectors::RawVectors;
|
||||||
use crate::vector::Embedding;
|
use crate::vector::Embedding;
|
||||||
use crate::{DocumentId, Index, InternalError, Result};
|
use crate::{DocumentId, Index, InternalError, Result, UserError};
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
#[serde(untagged)]
|
#[serde(untagged)]
|
||||||
@ -17,6 +17,15 @@ pub enum Embeddings<'doc> {
|
|||||||
FromJson(&'doc RawValue),
|
FromJson(&'doc RawValue),
|
||||||
FromDb(Vec<Embedding>),
|
FromDb(Vec<Embedding>),
|
||||||
}
|
}
|
||||||
|
impl<'doc> Embeddings<'doc> {
|
||||||
|
pub fn into_vec(self) -> std::result::Result<Vec<Embedding>, serde_json::Error> {
|
||||||
|
match self {
|
||||||
|
/// FIXME: this should be a VecOrArrayOfVec
|
||||||
|
Embeddings::FromJson(value) => serde_json::from_str(value.get()),
|
||||||
|
Embeddings::FromDb(vec) => Ok(vec),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct VectorEntry<'doc> {
|
pub struct VectorEntry<'doc> {
|
||||||
pub has_configured_embedder: bool,
|
pub has_configured_embedder: bool,
|
||||||
@ -46,8 +55,10 @@ impl<'t> VectorDocumentFromDb<'t> {
|
|||||||
rtxn: &'t RoTxn,
|
rtxn: &'t RoTxn,
|
||||||
db_fields_ids_map: &'t Mapper,
|
db_fields_ids_map: &'t Mapper,
|
||||||
doc_alloc: &'t Bump,
|
doc_alloc: &'t Bump,
|
||||||
) -> Result<Self> {
|
) -> Result<Option<Self>> {
|
||||||
let document = DocumentFromDb::new(docid, rtxn, index, db_fields_ids_map)?.unwrap();
|
let Some(document) = DocumentFromDb::new(docid, rtxn, index, db_fields_ids_map)? else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
let vectors = document.vectors_field()?;
|
let vectors = document.vectors_field()?;
|
||||||
let vectors_field = match vectors {
|
let vectors_field = match vectors {
|
||||||
Some(vectors) => {
|
Some(vectors) => {
|
||||||
@ -58,7 +69,7 @@ impl<'t> VectorDocumentFromDb<'t> {
|
|||||||
|
|
||||||
let embedding_config = index.embedding_configs(rtxn)?;
|
let embedding_config = index.embedding_configs(rtxn)?;
|
||||||
|
|
||||||
Ok(Self { docid, embedding_config, index, vectors_field, rtxn, doc_alloc })
|
Ok(Some(Self { docid, embedding_config, index, vectors_field, rtxn, doc_alloc }))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn entry_from_db(
|
fn entry_from_db(
|
||||||
@ -132,3 +143,35 @@ fn entry_from_raw_value(
|
|||||||
regenerate: value.must_regenerate(),
|
regenerate: value.must_regenerate(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct VectorDocumentFromVersions<'doc> {
|
||||||
|
vectors: RawMap<'doc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'doc> VectorDocumentFromVersions<'doc> {
|
||||||
|
pub fn new(versions: &Versions<'doc>, bump: &'doc Bump) -> Result<Option<Self>> {
|
||||||
|
let document = DocumentFromVersions::new(versions);
|
||||||
|
if let Some(vectors_field) = document.vectors_field()? {
|
||||||
|
let vectors =
|
||||||
|
RawMap::from_raw_value(vectors_field, bump).map_err(UserError::SerdeJson)?;
|
||||||
|
Ok(Some(Self { vectors }))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'doc> VectorDocument<'doc> for VectorDocumentFromVersions<'doc> {
|
||||||
|
fn iter_vectors(&self) -> impl Iterator<Item = Result<(&'doc str, VectorEntry<'doc>)>> {
|
||||||
|
self.vectors.iter().map(|(embedder, vectors)| {
|
||||||
|
let vectors = entry_from_raw_value(vectors).map_err(UserError::SerdeJson)?;
|
||||||
|
Ok((embedder, vectors))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn vectors_for_key(&self, key: &str) -> Result<Option<VectorEntry<'doc>>> {
|
||||||
|
let Some(vectors) = self.vectors.get(key) else { return Ok(None) };
|
||||||
|
let vectors = entry_from_raw_value(vectors).map_err(UserError::SerdeJson)?;
|
||||||
|
Ok(Some(vectors))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user