2024-10-28 14:23:20 +01:00
|
|
|
use bumpalo::Bump;
|
2024-08-28 18:45:16 +02:00
|
|
|
use heed::RoTxn;
|
|
|
|
|
2024-10-28 14:23:20 +01:00
|
|
|
use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument, Versions};
|
2024-10-28 16:18:48 +01:00
|
|
|
use super::vector_document::{
|
|
|
|
MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
|
|
|
|
};
|
2024-10-03 18:08:09 +02:00
|
|
|
use crate::documents::FieldIdMapper;
|
2024-10-30 13:50:51 +01:00
|
|
|
use crate::vector::EmbeddingConfigs;
|
2024-10-03 18:08:09 +02:00
|
|
|
use crate::{DocumentId, Index, Result};
|
2024-08-28 18:45:16 +02:00
|
|
|
|
2024-10-03 18:08:09 +02:00
|
|
|
pub enum DocumentChange<'doc> {
|
2024-10-14 15:40:42 +02:00
|
|
|
Deletion(Deletion<'doc>),
|
2024-10-03 18:08:09 +02:00
|
|
|
Update(Update<'doc>),
|
|
|
|
Insertion(Insertion<'doc>),
|
2024-08-28 18:45:16 +02:00
|
|
|
}
|
|
|
|
|
2024-10-14 15:40:42 +02:00
|
|
|
pub struct Deletion<'doc> {
|
2024-10-03 18:08:09 +02:00
|
|
|
docid: DocumentId,
|
2024-10-14 15:40:42 +02:00
|
|
|
external_document_id: &'doc str,
|
2024-08-28 18:45:16 +02:00
|
|
|
}
|
|
|
|
|
2024-10-03 18:08:09 +02:00
|
|
|
pub struct Update<'doc> {
|
|
|
|
docid: DocumentId,
|
2024-10-14 15:40:42 +02:00
|
|
|
external_document_id: &'doc str,
|
2024-10-28 14:23:20 +01:00
|
|
|
new: Versions<'doc>,
|
2024-10-03 18:08:09 +02:00
|
|
|
has_deletion: bool,
|
2024-08-28 18:45:16 +02:00
|
|
|
}
|
|
|
|
|
2024-10-03 18:08:09 +02:00
|
|
|
pub struct Insertion<'doc> {
|
|
|
|
docid: DocumentId,
|
2024-10-14 15:40:42 +02:00
|
|
|
external_document_id: &'doc str,
|
2024-10-28 14:23:20 +01:00
|
|
|
new: Versions<'doc>,
|
2024-08-28 18:45:16 +02:00
|
|
|
}
|
|
|
|
|
2024-10-03 18:08:09 +02:00
|
|
|
impl<'doc> DocumentChange<'doc> {
|
2024-09-03 11:02:39 +02:00
|
|
|
pub fn docid(&self) -> DocumentId {
|
2024-08-28 18:45:16 +02:00
|
|
|
match &self {
|
|
|
|
Self::Deletion(inner) => inner.docid(),
|
|
|
|
Self::Update(inner) => inner.docid(),
|
|
|
|
Self::Insertion(inner) => inner.docid(),
|
|
|
|
}
|
|
|
|
}
|
2024-10-03 18:08:09 +02:00
|
|
|
|
2024-10-14 15:40:42 +02:00
|
|
|
pub fn external_docid(&self) -> &'doc str {
|
2024-10-03 18:08:09 +02:00
|
|
|
match self {
|
|
|
|
DocumentChange::Deletion(deletion) => deletion.external_document_id(),
|
|
|
|
DocumentChange::Update(update) => update.external_document_id(),
|
|
|
|
DocumentChange::Insertion(insertion) => insertion.external_document_id(),
|
|
|
|
}
|
|
|
|
}
|
2024-08-28 18:45:16 +02:00
|
|
|
}
|
|
|
|
|
2024-10-14 15:40:42 +02:00
|
|
|
impl<'doc> Deletion<'doc> {
|
|
|
|
pub fn create(docid: DocumentId, external_document_id: &'doc str) -> Self {
|
2024-10-03 18:08:09 +02:00
|
|
|
Self { docid, external_document_id }
|
2024-08-28 18:45:16 +02:00
|
|
|
}
|
|
|
|
|
2024-09-03 11:02:39 +02:00
|
|
|
pub fn docid(&self) -> DocumentId {
|
2024-08-28 18:45:16 +02:00
|
|
|
self.docid
|
|
|
|
}
|
|
|
|
|
2024-10-14 15:40:42 +02:00
|
|
|
pub fn external_document_id(&self) -> &'doc str {
|
|
|
|
self.external_document_id
|
2024-10-02 11:32:19 +02:00
|
|
|
}
|
|
|
|
|
2024-10-03 18:08:09 +02:00
|
|
|
pub fn current<'a, Mapper: FieldIdMapper>(
|
2024-09-03 15:14:16 +02:00
|
|
|
&self,
|
|
|
|
rtxn: &'a RoTxn,
|
|
|
|
index: &'a Index,
|
2024-10-03 18:08:09 +02:00
|
|
|
mapper: &'a Mapper,
|
|
|
|
) -> Result<DocumentFromDb<'a, Mapper>> {
|
|
|
|
Ok(DocumentFromDb::new(self.docid, rtxn, index, mapper)?.ok_or(
|
|
|
|
crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid },
|
|
|
|
)?)
|
2024-08-28 18:45:16 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-10-03 18:08:09 +02:00
|
|
|
impl<'doc> Insertion<'doc> {
|
2024-10-28 14:23:20 +01:00
|
|
|
pub fn create(docid: DocumentId, external_document_id: &'doc str, new: Versions<'doc>) -> Self {
|
2024-10-02 11:32:19 +02:00
|
|
|
Insertion { docid, external_document_id, new }
|
2024-08-29 12:06:44 +02:00
|
|
|
}
|
|
|
|
|
2024-09-03 11:02:39 +02:00
|
|
|
pub fn docid(&self) -> DocumentId {
|
2024-08-28 18:45:16 +02:00
|
|
|
self.docid
|
|
|
|
}
|
|
|
|
|
2024-10-14 15:40:42 +02:00
|
|
|
pub fn external_document_id(&self) -> &'doc str {
|
|
|
|
self.external_document_id
|
2024-10-02 11:32:19 +02:00
|
|
|
}
|
2024-10-28 16:18:48 +01:00
|
|
|
pub fn inserted(&self) -> DocumentFromVersions<'_, 'doc> {
|
2024-10-28 14:23:20 +01:00
|
|
|
DocumentFromVersions::new(&self.new)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn inserted_vectors(
|
|
|
|
&self,
|
|
|
|
doc_alloc: &'doc Bump,
|
2024-10-30 13:50:51 +01:00
|
|
|
embedders: &'doc EmbeddingConfigs,
|
2024-10-28 14:23:20 +01:00
|
|
|
) -> Result<Option<VectorDocumentFromVersions<'doc>>> {
|
2024-11-12 22:49:22 +01:00
|
|
|
VectorDocumentFromVersions::new(self.external_document_id, &self.new, doc_alloc, embedders)
|
2024-08-28 18:45:16 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-10-03 18:08:09 +02:00
|
|
|
impl<'doc> Update<'doc> {
|
2024-08-29 12:06:44 +02:00
|
|
|
pub fn create(
|
|
|
|
docid: DocumentId,
|
2024-10-14 15:40:42 +02:00
|
|
|
external_document_id: &'doc str,
|
2024-10-28 14:23:20 +01:00
|
|
|
new: Versions<'doc>,
|
2024-10-03 18:08:09 +02:00
|
|
|
has_deletion: bool,
|
2024-08-29 12:06:44 +02:00
|
|
|
) -> Self {
|
2024-10-03 18:08:09 +02:00
|
|
|
Update { docid, new, external_document_id, has_deletion }
|
2024-08-29 12:06:44 +02:00
|
|
|
}
|
|
|
|
|
2024-09-03 11:02:39 +02:00
|
|
|
pub fn docid(&self) -> DocumentId {
|
2024-08-28 18:45:16 +02:00
|
|
|
self.docid
|
|
|
|
}
|
|
|
|
|
2024-10-14 15:40:42 +02:00
|
|
|
pub fn external_document_id(&self) -> &'doc str {
|
|
|
|
self.external_document_id
|
2024-10-02 11:32:19 +02:00
|
|
|
}
|
2024-10-03 18:08:09 +02:00
|
|
|
pub fn current<'a, Mapper: FieldIdMapper>(
|
2024-09-03 15:14:16 +02:00
|
|
|
&self,
|
|
|
|
rtxn: &'a RoTxn,
|
|
|
|
index: &'a Index,
|
2024-10-03 18:08:09 +02:00
|
|
|
mapper: &'a Mapper,
|
|
|
|
) -> Result<DocumentFromDb<'a, Mapper>> {
|
|
|
|
Ok(DocumentFromDb::new(self.docid, rtxn, index, mapper)?.ok_or(
|
|
|
|
crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid },
|
|
|
|
)?)
|
2024-08-28 18:45:16 +02:00
|
|
|
}
|
|
|
|
|
2024-10-28 14:23:20 +01:00
|
|
|
pub fn current_vectors<'a, Mapper: FieldIdMapper>(
|
2024-10-03 18:08:09 +02:00
|
|
|
&self,
|
|
|
|
rtxn: &'a RoTxn,
|
|
|
|
index: &'a Index,
|
|
|
|
mapper: &'a Mapper,
|
2024-10-28 14:23:20 +01:00
|
|
|
doc_alloc: &'a Bump,
|
|
|
|
) -> Result<VectorDocumentFromDb<'a>> {
|
|
|
|
Ok(VectorDocumentFromDb::new(self.docid, index, rtxn, mapper, doc_alloc)?.ok_or(
|
|
|
|
crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid },
|
|
|
|
)?)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn updated(&self) -> DocumentFromVersions<'_, 'doc> {
|
|
|
|
DocumentFromVersions::new(&self.new)
|
|
|
|
}
|
|
|
|
|
2024-10-28 16:18:48 +01:00
|
|
|
pub fn merged<'t, Mapper: FieldIdMapper>(
|
2024-10-28 14:23:20 +01:00
|
|
|
&self,
|
|
|
|
rtxn: &'t RoTxn,
|
|
|
|
index: &'t Index,
|
|
|
|
mapper: &'t Mapper,
|
|
|
|
) -> Result<MergedDocument<'_, 'doc, 't, Mapper>> {
|
2024-10-03 18:08:09 +02:00
|
|
|
if self.has_deletion {
|
2024-10-28 14:23:20 +01:00
|
|
|
Ok(MergedDocument::without_db(DocumentFromVersions::new(&self.new)))
|
2024-10-03 18:08:09 +02:00
|
|
|
} else {
|
2024-10-28 14:23:20 +01:00
|
|
|
MergedDocument::with_db(
|
|
|
|
self.docid,
|
|
|
|
rtxn,
|
|
|
|
index,
|
|
|
|
mapper,
|
|
|
|
DocumentFromVersions::new(&self.new),
|
|
|
|
)
|
2024-10-03 18:08:09 +02:00
|
|
|
}
|
|
|
|
}
|
2024-10-28 14:23:20 +01:00
|
|
|
|
|
|
|
pub fn updated_vectors(
|
|
|
|
&self,
|
|
|
|
doc_alloc: &'doc Bump,
|
2024-10-30 13:50:51 +01:00
|
|
|
embedders: &'doc EmbeddingConfigs,
|
2024-10-28 14:23:20 +01:00
|
|
|
) -> Result<Option<VectorDocumentFromVersions<'doc>>> {
|
2024-11-12 22:49:22 +01:00
|
|
|
VectorDocumentFromVersions::new(self.external_document_id, &self.new, doc_alloc, embedders)
|
2024-10-28 14:23:20 +01:00
|
|
|
}
|
2024-10-28 16:18:48 +01:00
|
|
|
|
|
|
|
pub fn merged_vectors<Mapper: FieldIdMapper>(
|
|
|
|
&self,
|
|
|
|
rtxn: &'doc RoTxn,
|
|
|
|
index: &'doc Index,
|
|
|
|
mapper: &'doc Mapper,
|
|
|
|
doc_alloc: &'doc Bump,
|
2024-10-30 13:50:51 +01:00
|
|
|
embedders: &'doc EmbeddingConfigs,
|
2024-10-28 16:18:48 +01:00
|
|
|
) -> Result<Option<MergedVectorDocument<'doc>>> {
|
|
|
|
if self.has_deletion {
|
2024-11-12 22:49:22 +01:00
|
|
|
MergedVectorDocument::without_db(
|
|
|
|
self.external_document_id,
|
|
|
|
&self.new,
|
|
|
|
doc_alloc,
|
|
|
|
embedders,
|
|
|
|
)
|
2024-10-28 16:18:48 +01:00
|
|
|
} else {
|
2024-10-30 13:50:51 +01:00
|
|
|
MergedVectorDocument::with_db(
|
2024-11-12 22:49:22 +01:00
|
|
|
self.docid,
|
|
|
|
self.external_document_id,
|
|
|
|
index,
|
|
|
|
rtxn,
|
|
|
|
mapper,
|
|
|
|
&self.new,
|
|
|
|
doc_alloc,
|
|
|
|
embedders,
|
2024-10-30 13:50:51 +01:00
|
|
|
)
|
2024-10-28 16:18:48 +01:00
|
|
|
}
|
|
|
|
}
|
2024-10-03 18:08:09 +02:00
|
|
|
}
|