mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-08 20:44:30 +01:00
Document trait changes
This commit is contained in:
parent
bbb67ae0a8
commit
65470e26e0
@ -20,6 +20,14 @@ pub trait Document<'doc> {
|
||||
/// - The `_vectors` and `_geo` fields are **ignored** by this method, meaning they are **not returned** by this method.
|
||||
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'doc str, &'doc RawValue)>>;
|
||||
|
||||
fn len(&self) -> usize;
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
fn top_level_field(&self, k: &str) -> Result<Option<&'doc RawValue>>;
|
||||
|
||||
/// Returns the unparsed value of the `_vectors` field from the document data.
|
||||
///
|
||||
/// This field alone is insufficient to retrieve vectors, as they may be stored in a dedicated location in the database.
|
||||
@ -37,6 +45,7 @@ pub trait Document<'doc> {
|
||||
fn geo_field(&self) -> Result<Option<&'doc RawValue>>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DocumentFromDb<'t, Mapper: FieldIdMapper>
|
||||
where
|
||||
Mapper: FieldIdMapper,
|
||||
@ -84,6 +93,14 @@ impl<'t, Mapper: FieldIdMapper> Document<'t> for DocumentFromDb<'t, Mapper> {
|
||||
fn geo_field(&self) -> Result<Option<&'t RawValue>> {
|
||||
self.field("_geo")
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.content.iter().count()
|
||||
}
|
||||
|
||||
fn top_level_field(&self, k: &str) -> Result<Option<&'t RawValue>> {
|
||||
self.field(k)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, Mapper: FieldIdMapper> DocumentFromDb<'t, Mapper> {
|
||||
@ -107,18 +124,18 @@ impl<'t, Mapper: FieldIdMapper> DocumentFromDb<'t, Mapper> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct DocumentFromVersions<'doc> {
|
||||
versions: Versions<'doc>,
|
||||
#[derive(Debug)]
|
||||
pub struct DocumentFromVersions<'a, 'doc> {
|
||||
versions: &'a Versions<'doc>,
|
||||
}
|
||||
|
||||
impl<'doc> DocumentFromVersions<'doc> {
|
||||
pub fn new(versions: Versions<'doc>) -> Self {
|
||||
impl<'a, 'doc> DocumentFromVersions<'a, 'doc> {
|
||||
pub fn new(versions: &'a Versions<'doc>) -> Self {
|
||||
Self { versions }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'doc> Document<'doc> for DocumentFromVersions<'doc> {
|
||||
impl<'a, 'doc> Document<'doc> for DocumentFromVersions<'a, 'doc> {
|
||||
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'doc str, &'doc RawValue)>> {
|
||||
self.versions.iter_top_level_fields().map(Ok)
|
||||
}
|
||||
@ -130,16 +147,25 @@ impl<'doc> Document<'doc> for DocumentFromVersions<'doc> {
|
||||
fn geo_field(&self) -> Result<Option<&'doc RawValue>> {
|
||||
Ok(self.versions.geo_field())
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.versions.len()
|
||||
}
|
||||
|
||||
fn top_level_field(&self, k: &str) -> Result<Option<&'doc RawValue>> {
|
||||
Ok(self.versions.top_level_field(k))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MergedDocument<'doc, 't, Mapper: FieldIdMapper> {
|
||||
new_doc: DocumentFromVersions<'doc>,
|
||||
#[derive(Debug)]
|
||||
pub struct MergedDocument<'a, 'doc, 't, Mapper: FieldIdMapper> {
|
||||
new_doc: DocumentFromVersions<'a, 'doc>,
|
||||
db: Option<DocumentFromDb<'t, Mapper>>,
|
||||
}
|
||||
|
||||
impl<'doc, 't, Mapper: FieldIdMapper> MergedDocument<'doc, 't, Mapper> {
|
||||
impl<'a, 'doc, 't, Mapper: FieldIdMapper> MergedDocument<'a, 'doc, 't, Mapper> {
|
||||
pub fn new(
|
||||
new_doc: DocumentFromVersions<'doc>,
|
||||
new_doc: DocumentFromVersions<'a, 'doc>,
|
||||
db: Option<DocumentFromDb<'t, Mapper>>,
|
||||
) -> Self {
|
||||
Self { new_doc, db }
|
||||
@ -150,19 +176,19 @@ impl<'doc, 't, Mapper: FieldIdMapper> MergedDocument<'doc, 't, Mapper> {
|
||||
rtxn: &'t RoTxn,
|
||||
index: &'t Index,
|
||||
db_fields_ids_map: &'t Mapper,
|
||||
new_doc: DocumentFromVersions<'doc>,
|
||||
new_doc: DocumentFromVersions<'a, 'doc>,
|
||||
) -> Result<Self> {
|
||||
let db = DocumentFromDb::new(docid, rtxn, index, db_fields_ids_map)?;
|
||||
Ok(Self { new_doc, db })
|
||||
}
|
||||
|
||||
pub fn without_db(new_doc: DocumentFromVersions<'doc>) -> Self {
|
||||
pub fn without_db(new_doc: DocumentFromVersions<'a, 'doc>) -> Self {
|
||||
Self { new_doc, db: None }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'d, 'doc: 'd, 't: 'd, Mapper: FieldIdMapper> Document<'d>
|
||||
for MergedDocument<'doc, 't, Mapper>
|
||||
for MergedDocument<'d, 'doc, 't, Mapper>
|
||||
{
|
||||
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'d str, &'d RawValue)>> {
|
||||
let mut new_doc_it = self.new_doc.iter_top_level_fields();
|
||||
@ -209,6 +235,20 @@ impl<'d, 'doc: 'd, 't: 'd, Mapper: FieldIdMapper> Document<'d>
|
||||
|
||||
db.geo_field()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.iter_top_level_fields().count()
|
||||
}
|
||||
|
||||
fn top_level_field(&self, k: &str) -> Result<Option<&'d RawValue>> {
|
||||
if let Some(f) = self.new_doc.top_level_field(k)? {
|
||||
return Ok(Some(f));
|
||||
}
|
||||
if let Some(db) = self.db {
|
||||
return db.field(k);
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'doc, D> Document<'doc> for &D
|
||||
@ -226,6 +266,14 @@ where
|
||||
fn geo_field(&self) -> Result<Option<&'doc RawValue>> {
|
||||
D::geo_field(self)
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
D::len(self)
|
||||
}
|
||||
|
||||
fn top_level_field(&self, k: &str) -> Result<Option<&'doc RawValue>> {
|
||||
D::top_level_field(self, k)
|
||||
}
|
||||
}
|
||||
|
||||
/// Turn this document into an obkv, whose fields are indexed by the provided `FieldIdMapper`.
|
||||
@ -301,11 +349,9 @@ where
|
||||
|
||||
pub type Entry<'doc> = (&'doc str, &'doc RawValue);
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[derive(Debug)]
|
||||
pub struct Versions<'doc> {
|
||||
data: &'doc [Entry<'doc>],
|
||||
vectors: Option<&'doc RawValue>,
|
||||
geo: Option<&'doc RawValue>,
|
||||
data: RawMap<'doc>,
|
||||
}
|
||||
|
||||
impl<'doc> Versions<'doc> {
|
||||
@ -324,26 +370,30 @@ impl<'doc> Versions<'doc> {
|
||||
}
|
||||
|
||||
pub fn single(version: RawMap<'doc>) -> Self {
|
||||
let vectors_id = version.get_index(RESERVED_VECTORS_FIELD_NAME);
|
||||
let geo_id = version.get_index("_geo");
|
||||
let mut data = version.into_vec();
|
||||
let geo = geo_id.map(|geo_id| data.remove(geo_id).1);
|
||||
let vectors = vectors_id.map(|vectors_id| data.remove(vectors_id).1);
|
||||
|
||||
let data = data.into_bump_slice();
|
||||
|
||||
Self { data, geo, vectors }
|
||||
Self { data: version }
|
||||
}
|
||||
|
||||
pub fn iter_top_level_fields(&self) -> impl Iterator<Item = Entry<'doc>> {
|
||||
self.data.iter().copied()
|
||||
pub fn iter_top_level_fields(&self) -> raw_collections::map::iter::Iter<'doc, '_> {
|
||||
/// FIXME: ignore vectors and _geo
|
||||
self.data.iter()
|
||||
}
|
||||
|
||||
pub fn vectors_field(&self) -> Option<&'doc RawValue> {
|
||||
self.vectors
|
||||
self.data.get(RESERVED_VECTORS_FIELD_NAME)
|
||||
}
|
||||
|
||||
pub fn geo_field(&self) -> Option<&'doc RawValue> {
|
||||
self.geo
|
||||
self.data.get("_geo")
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.data.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.data.is_empty()
|
||||
}
|
||||
pub fn top_level_field(&self, k: &str) -> Option<&'doc RawValue> {
|
||||
self.data.get(k)
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,8 @@
|
||||
use bumpalo::Bump;
|
||||
use heed::RoTxn;
|
||||
|
||||
use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument};
|
||||
use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument, Versions};
|
||||
use super::vector_document::{VectorDocumentFromDb, VectorDocumentFromVersions};
|
||||
use crate::documents::FieldIdMapper;
|
||||
use crate::{DocumentId, Index, Result};
|
||||
|
||||
@ -18,14 +20,14 @@ pub struct Deletion<'doc> {
|
||||
pub struct Update<'doc> {
|
||||
docid: DocumentId,
|
||||
external_document_id: &'doc str,
|
||||
new: DocumentFromVersions<'doc>,
|
||||
new: Versions<'doc>,
|
||||
has_deletion: bool,
|
||||
}
|
||||
|
||||
pub struct Insertion<'doc> {
|
||||
docid: DocumentId,
|
||||
external_document_id: &'doc str,
|
||||
new: DocumentFromVersions<'doc>,
|
||||
new: Versions<'doc>,
|
||||
}
|
||||
|
||||
impl<'doc> DocumentChange<'doc> {
|
||||
@ -72,11 +74,7 @@ impl<'doc> Deletion<'doc> {
|
||||
}
|
||||
|
||||
impl<'doc> Insertion<'doc> {
|
||||
pub fn create(
|
||||
docid: DocumentId,
|
||||
external_document_id: &'doc str,
|
||||
new: DocumentFromVersions<'doc>,
|
||||
) -> Self {
|
||||
pub fn create(docid: DocumentId, external_document_id: &'doc str, new: Versions<'doc>) -> Self {
|
||||
Insertion { docid, external_document_id, new }
|
||||
}
|
||||
|
||||
@ -87,8 +85,15 @@ impl<'doc> Insertion<'doc> {
|
||||
pub fn external_document_id(&self) -> &'doc str {
|
||||
self.external_document_id
|
||||
}
|
||||
pub fn new(&self) -> DocumentFromVersions<'doc> {
|
||||
self.new
|
||||
pub fn new(&self) -> DocumentFromVersions<'_, 'doc> {
|
||||
DocumentFromVersions::new(&self.new)
|
||||
}
|
||||
|
||||
pub fn inserted_vectors(
|
||||
&self,
|
||||
doc_alloc: &'doc Bump,
|
||||
) -> Result<Option<VectorDocumentFromVersions<'doc>>> {
|
||||
VectorDocumentFromVersions::new(&self.new, doc_alloc)
|
||||
}
|
||||
}
|
||||
|
||||
@ -96,7 +101,7 @@ impl<'doc> Update<'doc> {
|
||||
pub fn create(
|
||||
docid: DocumentId,
|
||||
external_document_id: &'doc str,
|
||||
new: DocumentFromVersions<'doc>,
|
||||
new: Versions<'doc>,
|
||||
has_deletion: bool,
|
||||
) -> Self {
|
||||
Update { docid, new, external_document_id, has_deletion }
|
||||
@ -120,20 +125,45 @@ impl<'doc> Update<'doc> {
|
||||
)?)
|
||||
}
|
||||
|
||||
pub fn updated(&self) -> DocumentFromVersions<'doc> {
|
||||
self.new
|
||||
}
|
||||
|
||||
pub fn new<'a, Mapper: FieldIdMapper>(
|
||||
pub fn current_vectors<'a, Mapper: FieldIdMapper>(
|
||||
&self,
|
||||
rtxn: &'a RoTxn,
|
||||
index: &'a Index,
|
||||
mapper: &'a Mapper,
|
||||
) -> Result<MergedDocument<'doc, 'a, Mapper>> {
|
||||
doc_alloc: &'a Bump,
|
||||
) -> Result<VectorDocumentFromDb<'a>> {
|
||||
Ok(VectorDocumentFromDb::new(self.docid, index, rtxn, mapper, doc_alloc)?.ok_or(
|
||||
crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid },
|
||||
)?)
|
||||
}
|
||||
|
||||
pub fn updated(&self) -> DocumentFromVersions<'_, 'doc> {
|
||||
DocumentFromVersions::new(&self.new)
|
||||
}
|
||||
|
||||
pub fn new<'t, Mapper: FieldIdMapper>(
|
||||
&self,
|
||||
rtxn: &'t RoTxn,
|
||||
index: &'t Index,
|
||||
mapper: &'t Mapper,
|
||||
) -> Result<MergedDocument<'_, 'doc, 't, Mapper>> {
|
||||
if self.has_deletion {
|
||||
Ok(MergedDocument::without_db(self.new))
|
||||
Ok(MergedDocument::without_db(DocumentFromVersions::new(&self.new)))
|
||||
} else {
|
||||
MergedDocument::with_db(self.docid, rtxn, index, mapper, self.new)
|
||||
MergedDocument::with_db(
|
||||
self.docid,
|
||||
rtxn,
|
||||
index,
|
||||
mapper,
|
||||
DocumentFromVersions::new(&self.new),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn updated_vectors(
|
||||
&self,
|
||||
doc_alloc: &'doc Bump,
|
||||
) -> Result<Option<VectorDocumentFromVersions<'doc>>> {
|
||||
VectorDocumentFromVersions::new(&self.new, doc_alloc)
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user