mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Write into documents
This commit is contained in:
parent
9cbb2b066a
commit
7058959a46
@ -1,13 +1,14 @@
|
|||||||
use std::collections::BTreeSet;
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
|
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use raw_collections::RawMap;
|
use raw_collections::RawMap;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
|
use super::vector_document::{VectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions};
|
||||||
use super::{KvReaderFieldId, KvWriterFieldId};
|
use super::{KvReaderFieldId, KvWriterFieldId};
|
||||||
use crate::documents::FieldIdMapper;
|
use crate::documents::FieldIdMapper;
|
||||||
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
|
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
|
||||||
use crate::{DocumentId, Index, InternalError, Result};
|
use crate::{DocumentId, GlobalFieldsIdsMap, Index, InternalError, Result, UserError};
|
||||||
|
|
||||||
/// A view into a document that can represent either the current version from the DB,
|
/// A view into a document that can represent either the current version from the DB,
|
||||||
/// the update data from payload or other means, or the merged updated version.
|
/// the update data from payload or other means, or the merged updated version.
|
||||||
@ -69,17 +70,22 @@ impl<'t, Mapper: FieldIdMapper> Document<'t> for DocumentFromDb<'t, Mapper> {
|
|||||||
std::iter::from_fn(move || {
|
std::iter::from_fn(move || {
|
||||||
let (fid, value) = it.next()?;
|
let (fid, value) = it.next()?;
|
||||||
|
|
||||||
let res = (|| {
|
let res = (|| loop {
|
||||||
let value =
|
|
||||||
serde_json::from_slice(value).map_err(crate::InternalError::SerdeJson)?;
|
|
||||||
|
|
||||||
let name = self.fields_ids_map.name(fid).ok_or(
|
let name = self.fields_ids_map.name(fid).ok_or(
|
||||||
InternalError::FieldIdMapMissingEntry(crate::FieldIdMapMissingEntry::FieldId {
|
InternalError::FieldIdMapMissingEntry(crate::FieldIdMapMissingEntry::FieldId {
|
||||||
field_id: fid,
|
field_id: fid,
|
||||||
process: "getting current document",
|
process: "getting current document",
|
||||||
}),
|
}),
|
||||||
)?;
|
)?;
|
||||||
Ok((name, value))
|
|
||||||
|
if name == RESERVED_VECTORS_FIELD_NAME || name == "_geo" {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let value =
|
||||||
|
serde_json::from_slice(value).map_err(crate::InternalError::SerdeJson)?;
|
||||||
|
|
||||||
|
return Ok((name, value));
|
||||||
})();
|
})();
|
||||||
|
|
||||||
Some(res)
|
Some(res)
|
||||||
@ -164,13 +170,6 @@ pub struct MergedDocument<'a, 'doc, 't, Mapper: FieldIdMapper> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 'doc, 't, Mapper: FieldIdMapper> MergedDocument<'a, 'doc, 't, Mapper> {
|
impl<'a, 'doc, 't, Mapper: FieldIdMapper> MergedDocument<'a, 'doc, 't, Mapper> {
|
||||||
pub fn new(
|
|
||||||
new_doc: DocumentFromVersions<'a, 'doc>,
|
|
||||||
db: Option<DocumentFromDb<'t, Mapper>>,
|
|
||||||
) -> Self {
|
|
||||||
Self { new_doc, db }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_db(
|
pub fn with_db(
|
||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
rtxn: &'t RoTxn,
|
rtxn: &'t RoTxn,
|
||||||
@ -287,15 +286,14 @@ where
|
|||||||
///
|
///
|
||||||
/// - If the document contains a top-level field that is not present in `fields_ids_map`.
|
/// - If the document contains a top-level field that is not present in `fields_ids_map`.
|
||||||
///
|
///
|
||||||
pub fn write_to_obkv<'s, 'a, 'b>(
|
pub fn write_to_obkv<'s, 'a, 'map>(
|
||||||
document: &'s impl Document<'s>,
|
document: &'s impl Document<'s>,
|
||||||
vector_document: Option<()>,
|
vector_document: Option<&'s impl VectorDocument<'s>>,
|
||||||
fields_ids_map: &'a impl FieldIdMapper,
|
fields_ids_map: &'a mut GlobalFieldsIdsMap<'map>,
|
||||||
mut document_buffer: &'a mut Vec<u8>,
|
mut document_buffer: &'a mut Vec<u8>,
|
||||||
) -> Result<&'a KvReaderFieldId>
|
) -> Result<&'a KvReaderFieldId>
|
||||||
where
|
where
|
||||||
's: 'a,
|
's: 'a,
|
||||||
's: 'b,
|
|
||||||
{
|
{
|
||||||
// will be used in 'inject_vectors
|
// will be used in 'inject_vectors
|
||||||
let vectors_value: Box<RawValue>;
|
let vectors_value: Box<RawValue>;
|
||||||
@ -308,19 +306,21 @@ where
|
|||||||
|
|
||||||
for res in document.iter_top_level_fields() {
|
for res in document.iter_top_level_fields() {
|
||||||
let (field_name, value) = res?;
|
let (field_name, value) = res?;
|
||||||
let field_id = fields_ids_map.id(field_name).unwrap();
|
let field_id =
|
||||||
|
fields_ids_map.id_or_insert(field_name).ok_or(UserError::AttributeLimitReached)?;
|
||||||
unordered_field_buffer.push((field_id, value));
|
unordered_field_buffer.push((field_id, value));
|
||||||
}
|
}
|
||||||
|
|
||||||
'inject_vectors: {
|
'inject_vectors: {
|
||||||
let Some(vector_document) = vector_document else { break 'inject_vectors };
|
let Some(vector_document) = vector_document else { break 'inject_vectors };
|
||||||
|
|
||||||
let Some(vectors_fid) = fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME) else {
|
let vectors_fid = fields_ids_map
|
||||||
break 'inject_vectors;
|
.id_or_insert(RESERVED_VECTORS_FIELD_NAME)
|
||||||
};
|
.ok_or(UserError::AttributeLimitReached)?;
|
||||||
/*
|
|
||||||
let mut vectors = BTreeMap::new();
|
let mut vectors = BTreeMap::new();
|
||||||
for (name, entry) in vector_document.iter_vectors() {
|
for res in vector_document.iter_vectors() {
|
||||||
|
let (name, entry) = res?;
|
||||||
if entry.has_configured_embedder {
|
if entry.has_configured_embedder {
|
||||||
continue; // we don't write vectors with configured embedder in documents
|
continue; // we don't write vectors with configured embedder in documents
|
||||||
}
|
}
|
||||||
@ -335,7 +335,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
vectors_value = serde_json::value::to_raw_value(&vectors).unwrap();
|
vectors_value = serde_json::value::to_raw_value(&vectors).unwrap();
|
||||||
unordered_field_buffer.push((vectors_fid, &vectors_value));*/
|
unordered_field_buffer.push((vectors_fid, &vectors_value));
|
||||||
}
|
}
|
||||||
|
|
||||||
unordered_field_buffer.sort_by_key(|(fid, _)| *fid);
|
unordered_field_buffer.sort_by_key(|(fid, _)| *fid);
|
||||||
@ -373,9 +373,8 @@ impl<'doc> Versions<'doc> {
|
|||||||
Self { data: version }
|
Self { data: version }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn iter_top_level_fields(&self) -> raw_collections::map::iter::Iter<'doc, '_> {
|
pub fn iter_top_level_fields(&self) -> impl Iterator<Item = (&'doc str, &'doc RawValue)> + '_ {
|
||||||
/// FIXME: ignore vectors and _geo
|
self.data.iter().filter(|(k, _)| *k != RESERVED_VECTORS_FIELD_NAME && *k != "_geo")
|
||||||
self.data.iter()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn vectors_field(&self) -> Option<&'doc RawValue> {
|
pub fn vectors_field(&self) -> Option<&'doc RawValue> {
|
||||||
|
@ -2,7 +2,9 @@ use bumpalo::Bump;
|
|||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
|
|
||||||
use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument, Versions};
|
use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument, Versions};
|
||||||
use super::vector_document::{VectorDocumentFromDb, VectorDocumentFromVersions};
|
use super::vector_document::{
|
||||||
|
MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
|
||||||
|
};
|
||||||
use crate::documents::FieldIdMapper;
|
use crate::documents::FieldIdMapper;
|
||||||
use crate::{DocumentId, Index, Result};
|
use crate::{DocumentId, Index, Result};
|
||||||
|
|
||||||
@ -85,7 +87,7 @@ impl<'doc> Insertion<'doc> {
|
|||||||
pub fn external_document_id(&self) -> &'doc str {
|
pub fn external_document_id(&self) -> &'doc str {
|
||||||
self.external_document_id
|
self.external_document_id
|
||||||
}
|
}
|
||||||
pub fn new(&self) -> DocumentFromVersions<'_, 'doc> {
|
pub fn inserted(&self) -> DocumentFromVersions<'_, 'doc> {
|
||||||
DocumentFromVersions::new(&self.new)
|
DocumentFromVersions::new(&self.new)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -141,7 +143,7 @@ impl<'doc> Update<'doc> {
|
|||||||
DocumentFromVersions::new(&self.new)
|
DocumentFromVersions::new(&self.new)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new<'t, Mapper: FieldIdMapper>(
|
pub fn merged<'t, Mapper: FieldIdMapper>(
|
||||||
&self,
|
&self,
|
||||||
rtxn: &'t RoTxn,
|
rtxn: &'t RoTxn,
|
||||||
index: &'t Index,
|
index: &'t Index,
|
||||||
@ -166,4 +168,18 @@ impl<'doc> Update<'doc> {
|
|||||||
) -> Result<Option<VectorDocumentFromVersions<'doc>>> {
|
) -> Result<Option<VectorDocumentFromVersions<'doc>>> {
|
||||||
VectorDocumentFromVersions::new(&self.new, doc_alloc)
|
VectorDocumentFromVersions::new(&self.new, doc_alloc)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn merged_vectors<Mapper: FieldIdMapper>(
|
||||||
|
&self,
|
||||||
|
rtxn: &'doc RoTxn,
|
||||||
|
index: &'doc Index,
|
||||||
|
mapper: &'doc Mapper,
|
||||||
|
doc_alloc: &'doc Bump,
|
||||||
|
) -> Result<Option<MergedVectorDocument<'doc>>> {
|
||||||
|
if self.has_deletion {
|
||||||
|
MergedVectorDocument::without_db(&self.new, doc_alloc)
|
||||||
|
} else {
|
||||||
|
MergedVectorDocument::with_db(self.docid, index, rtxn, mapper, &self.new, doc_alloc)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -120,7 +120,7 @@ impl FacetedDocidsExtractor {
|
|||||||
|
|
||||||
extract_document_facets(
|
extract_document_facets(
|
||||||
attributes_to_extract,
|
attributes_to_extract,
|
||||||
inner.new(rtxn, index, context.db_fields_ids_map)?,
|
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
||||||
new_fields_ids_map.deref_mut(),
|
new_fields_ids_map.deref_mut(),
|
||||||
&mut |fid, value| {
|
&mut |fid, value| {
|
||||||
Self::facet_fn_with_options(
|
Self::facet_fn_with_options(
|
||||||
@ -136,7 +136,7 @@ impl FacetedDocidsExtractor {
|
|||||||
}
|
}
|
||||||
DocumentChange::Insertion(inner) => extract_document_facets(
|
DocumentChange::Insertion(inner) => extract_document_facets(
|
||||||
attributes_to_extract,
|
attributes_to_extract,
|
||||||
inner.new(),
|
inner.inserted(),
|
||||||
new_fields_ids_map.deref_mut(),
|
new_fields_ids_map.deref_mut(),
|
||||||
&mut |fid, value| {
|
&mut |fid, value| {
|
||||||
Self::facet_fn_with_options(
|
Self::facet_fn_with_options(
|
||||||
|
@ -481,7 +481,7 @@ impl WordDocidsExtractors {
|
|||||||
.map_err(crate::Error::from)
|
.map_err(crate::Error::from)
|
||||||
};
|
};
|
||||||
document_tokenizer.tokenize_document(
|
document_tokenizer.tokenize_document(
|
||||||
inner.new(rtxn, index, context.db_fields_ids_map)?,
|
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
&mut token_fn,
|
&mut token_fn,
|
||||||
)?;
|
)?;
|
||||||
@ -500,7 +500,7 @@ impl WordDocidsExtractors {
|
|||||||
.map_err(crate::Error::from)
|
.map_err(crate::Error::from)
|
||||||
};
|
};
|
||||||
document_tokenizer.tokenize_document(
|
document_tokenizer.tokenize_document(
|
||||||
inner.new(),
|
inner.inserted(),
|
||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
&mut token_fn,
|
&mut token_fn,
|
||||||
)?;
|
)?;
|
||||||
|
@ -80,7 +80,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
del_word_pair_proximity.push(((w1, w2), prox));
|
del_word_pair_proximity.push(((w1, w2), prox));
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
let document = inner.new(rtxn, index, context.db_fields_ids_map)?;
|
let document = inner.merged(rtxn, index, context.db_fields_ids_map)?;
|
||||||
process_document_tokens(
|
process_document_tokens(
|
||||||
document,
|
document,
|
||||||
document_tokenizer,
|
document_tokenizer,
|
||||||
@ -92,7 +92,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
DocumentChange::Insertion(inner) => {
|
DocumentChange::Insertion(inner) => {
|
||||||
let document = inner.new();
|
let document = inner.inserted();
|
||||||
process_document_tokens(
|
process_document_tokens(
|
||||||
document,
|
document,
|
||||||
document_tokenizer,
|
document_tokenizer,
|
||||||
|
@ -100,7 +100,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
|
|||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?;
|
)?;
|
||||||
let old_rendered = prompt.render_document(
|
let old_rendered = prompt.render_document(
|
||||||
update.new(
|
update.merged(
|
||||||
&context.txn,
|
&context.txn,
|
||||||
context.index,
|
context.index,
|
||||||
context.db_fields_ids_map,
|
context.db_fields_ids_map,
|
||||||
@ -123,7 +123,11 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
|
|||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?;
|
)?;
|
||||||
let new_rendered = prompt.render_document(
|
let new_rendered = prompt.render_document(
|
||||||
update.new(&context.txn, context.index, context.db_fields_ids_map)?,
|
update.merged(
|
||||||
|
&context.txn,
|
||||||
|
context.index,
|
||||||
|
context.db_fields_ids_map,
|
||||||
|
)?,
|
||||||
context.new_fields_ids_map,
|
context.new_fields_ids_map,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?;
|
)?;
|
||||||
@ -156,7 +160,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
} else if new_vectors.regenerate {
|
} else if new_vectors.regenerate {
|
||||||
let rendered = prompt.render_document(
|
let rendered = prompt.render_document(
|
||||||
insertion.new(),
|
insertion.inserted(),
|
||||||
context.new_fields_ids_map,
|
context.new_fields_ids_map,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?;
|
)?;
|
||||||
@ -164,7 +168,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let rendered = prompt.render_document(
|
let rendered = prompt.render_document(
|
||||||
insertion.new(),
|
insertion.inserted(),
|
||||||
context.new_fields_ids_map,
|
context.new_fields_ids_map,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?;
|
)?;
|
||||||
|
@ -64,9 +64,7 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentExtractor<'a> {
|
|||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut document_buffer = Vec::new();
|
let mut document_buffer = Vec::new();
|
||||||
|
|
||||||
let new_fields_ids_map = context.new_fields_ids_map.borrow_or_yield();
|
let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield();
|
||||||
let new_fields_ids_map = &*new_fields_ids_map;
|
|
||||||
let new_fields_ids_map = new_fields_ids_map.local_map();
|
|
||||||
|
|
||||||
for change in changes {
|
for change in changes {
|
||||||
let change = change?;
|
let change = change?;
|
||||||
@ -78,20 +76,34 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentExtractor<'a> {
|
|||||||
let docid = deletion.docid();
|
let docid = deletion.docid();
|
||||||
self.document_sender.delete(docid, external_docid).unwrap();
|
self.document_sender.delete(docid, external_docid).unwrap();
|
||||||
}
|
}
|
||||||
/// TODO: change NONE by SOME(vector) when implemented
|
|
||||||
DocumentChange::Update(update) => {
|
DocumentChange::Update(update) => {
|
||||||
let docid = update.docid();
|
let docid = update.docid();
|
||||||
let content =
|
let content =
|
||||||
update.new(&context.txn, context.index, &context.db_fields_ids_map)?;
|
update.merged(&context.txn, context.index, &context.db_fields_ids_map)?;
|
||||||
let content =
|
let vector_content = update.merged_vectors(
|
||||||
write_to_obkv(&content, None, new_fields_ids_map, &mut document_buffer)?;
|
&context.txn,
|
||||||
|
context.index,
|
||||||
|
&context.db_fields_ids_map,
|
||||||
|
&context.doc_alloc,
|
||||||
|
)?;
|
||||||
|
let content = write_to_obkv(
|
||||||
|
&content,
|
||||||
|
vector_content.as_ref(),
|
||||||
|
&mut new_fields_ids_map,
|
||||||
|
&mut document_buffer,
|
||||||
|
)?;
|
||||||
self.document_sender.insert(docid, external_docid, content.boxed()).unwrap();
|
self.document_sender.insert(docid, external_docid, content.boxed()).unwrap();
|
||||||
}
|
}
|
||||||
DocumentChange::Insertion(insertion) => {
|
DocumentChange::Insertion(insertion) => {
|
||||||
let docid = insertion.docid();
|
let docid = insertion.docid();
|
||||||
let content = insertion.new();
|
let content = insertion.inserted();
|
||||||
let content =
|
let inserted_vectors = insertion.inserted_vectors(&context.doc_alloc)?;
|
||||||
write_to_obkv(&content, None, new_fields_ids_map, &mut document_buffer)?;
|
let content = write_to_obkv(
|
||||||
|
&content,
|
||||||
|
inserted_vectors.as_ref(),
|
||||||
|
&mut new_fields_ids_map,
|
||||||
|
&mut document_buffer,
|
||||||
|
)?;
|
||||||
self.document_sender.insert(docid, external_docid, content.boxed()).unwrap();
|
self.document_sender.insert(docid, external_docid, content.boxed()).unwrap();
|
||||||
// extracted_dictionary_sender.send(self, dictionary: &[u8]);
|
// extracted_dictionary_sender.send(self, dictionary: &[u8]);
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
use std::collections::BTreeSet;
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use raw_collections::RawMap;
|
use raw_collections::RawMap;
|
||||||
@ -106,14 +108,9 @@ impl<'t> VectorDocument<'t> for VectorDocumentFromDb<'t> {
|
|||||||
let config_name = self.doc_alloc.alloc_str(config.name.as_str());
|
let config_name = self.doc_alloc.alloc_str(config.name.as_str());
|
||||||
Ok((&*config_name, entry))
|
Ok((&*config_name, entry))
|
||||||
})
|
})
|
||||||
.chain(self.vectors_field.iter().map(|map| map.iter()).flatten().map(
|
.chain(self.vectors_field.iter().flat_map(|map| map.iter()).map(|(name, value)| {
|
||||||
|(name, value)| {
|
Ok((name, entry_from_raw_value(value).map_err(InternalError::SerdeJson)?))
|
||||||
Ok((
|
}))
|
||||||
name.as_ref(),
|
|
||||||
entry_from_raw_value(value).map_err(InternalError::SerdeJson)?,
|
|
||||||
))
|
|
||||||
},
|
|
||||||
))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn vectors_for_key(&self, key: &str) -> Result<Option<VectorEntry<'t>>> {
|
fn vectors_for_key(&self, key: &str) -> Result<Option<VectorEntry<'t>>> {
|
||||||
@ -139,7 +136,7 @@ fn entry_from_raw_value(
|
|||||||
let value: RawVectors = serde_json::from_str(value.get())?;
|
let value: RawVectors = serde_json::from_str(value.get())?;
|
||||||
Ok(VectorEntry {
|
Ok(VectorEntry {
|
||||||
has_configured_embedder: false,
|
has_configured_embedder: false,
|
||||||
embeddings: value.embeddings().map(|embeddings| Embeddings::FromJson(embeddings)),
|
embeddings: value.embeddings().map(Embeddings::FromJson),
|
||||||
regenerate: value.must_regenerate(),
|
regenerate: value.must_regenerate(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -175,3 +172,69 @@ impl<'doc> VectorDocument<'doc> for VectorDocumentFromVersions<'doc> {
|
|||||||
Ok(Some(vectors))
|
Ok(Some(vectors))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct MergedVectorDocument<'doc> {
|
||||||
|
new_doc: Option<VectorDocumentFromVersions<'doc>>,
|
||||||
|
db: Option<VectorDocumentFromDb<'doc>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'doc> MergedVectorDocument<'doc> {
|
||||||
|
pub fn with_db<Mapper: FieldIdMapper>(
|
||||||
|
docid: DocumentId,
|
||||||
|
index: &'doc Index,
|
||||||
|
rtxn: &'doc RoTxn,
|
||||||
|
db_fields_ids_map: &'doc Mapper,
|
||||||
|
versions: &Versions<'doc>,
|
||||||
|
doc_alloc: &'doc Bump,
|
||||||
|
) -> Result<Option<Self>> {
|
||||||
|
let db = VectorDocumentFromDb::new(docid, index, rtxn, db_fields_ids_map, doc_alloc)?;
|
||||||
|
let new_doc = VectorDocumentFromVersions::new(versions, doc_alloc)?;
|
||||||
|
Ok(if db.is_none() && new_doc.is_none() { None } else { Some(Self { new_doc, db }) })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn without_db(versions: &Versions<'doc>, doc_alloc: &'doc Bump) -> Result<Option<Self>> {
|
||||||
|
let Some(new_doc) = VectorDocumentFromVersions::new(versions, doc_alloc)? else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
Ok(Some(Self { new_doc: Some(new_doc), db: None }))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'doc> VectorDocument<'doc> for MergedVectorDocument<'doc> {
|
||||||
|
fn iter_vectors(&self) -> impl Iterator<Item = Result<(&'doc str, VectorEntry<'doc>)>> {
|
||||||
|
let mut new_doc_it = self.new_doc.iter().flat_map(|new_doc| new_doc.iter_vectors());
|
||||||
|
let mut db_it = self.db.iter().flat_map(|db| db.iter_vectors());
|
||||||
|
let mut seen_fields = BTreeSet::new();
|
||||||
|
|
||||||
|
std::iter::from_fn(move || {
|
||||||
|
if let Some(next) = new_doc_it.next() {
|
||||||
|
if let Ok((name, _)) = next {
|
||||||
|
seen_fields.insert(name);
|
||||||
|
}
|
||||||
|
return Some(next);
|
||||||
|
}
|
||||||
|
loop {
|
||||||
|
match db_it.next()? {
|
||||||
|
Ok((name, value)) => {
|
||||||
|
if seen_fields.contains(name) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return Some(Ok((name, value)));
|
||||||
|
}
|
||||||
|
Err(err) => return Some(Err(err)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn vectors_for_key(&self, key: &str) -> Result<Option<VectorEntry<'doc>>> {
|
||||||
|
if let Some(new_doc) = &self.new_doc {
|
||||||
|
if let Some(entry) = new_doc.vectors_for_key(key)? {
|
||||||
|
return Ok(Some(entry));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(db) = self.db.as_ref() else { return Ok(None) };
|
||||||
|
db.vectors_for_key(key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user