Move the document id extraction to the primary key code

This commit is contained in:
Clément Renault 2024-09-12 12:01:31 +02:00
parent aa69308e45
commit ff5d3b59f5
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
3 changed files with 65 additions and 53 deletions

View File

@ -1,8 +1,10 @@
use std::borrow::Cow;
use std::iter;
use std::result::Result as StdResult;
use serde_json::Value;
use serde_json::{from_str, Value};
use crate::update::new::{CowStr, TopLevelMap};
use crate::{FieldId, InternalError, Object, Result, UserError};
/// The symbol used to define levels in a nested primary key.
@ -100,6 +102,45 @@ impl<'a> PrimaryKey<'a> {
}
}
/// Returns the document ID based on the primary and
/// search for it recursively in zero-copy-deserialized documents.
pub fn document_id_from_top_level_map<'p>(
&self,
document: &TopLevelMap<'p>,
) -> Result<StdResult<CowStr<'p>, DocumentIdExtractionError>> {
fn get_docid<'p>(
document: &TopLevelMap<'p>,
primary_key: &[&str],
) -> Result<StdResult<CowStr<'p>, DocumentIdExtractionError>> {
match primary_key {
[] => unreachable!("arrrgh"), // would None be ok?
[primary_key] => match document.0.get(*primary_key) {
Some(value) => match from_str::<u64>(value.get()) {
Ok(value) => Ok(Ok(CowStr(Cow::Owned(value.to_string())))),
Err(_) => match from_str(value.get()) {
Ok(document_id) => Ok(Ok(document_id)),
Err(e) => Ok(Err(DocumentIdExtractionError::InvalidDocumentId(
UserError::SerdeJson(e),
))),
},
},
None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
},
[head, tail @ ..] => match document.0.get(*head) {
Some(value) => {
let document = from_str(value.get()).map_err(InternalError::SerdeJson)?;
get_docid(&document, tail)
}
None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
},
}
}
/// TODO do not allocate a vec everytime here
let primary_key: Vec<_> = self.name().split(PRIMARY_KEY_SPLIT_SYMBOL).collect();
get_docid(document, &primary_key)
}
/// Returns an `Iterator` that gives all the possible fields names the primary key
/// can have depending of the first level name and depth of the objects.
pub fn possible_level_names(&self) -> impl Iterator<Item = (&str, &str)> + '_ {

View File

@ -13,7 +13,7 @@ use super::super::document_change::DocumentChange;
use super::super::items_pool::ItemsPool;
use super::top_level_map::{CowStr, TopLevelMap};
use super::DocumentChanges;
use crate::documents::PrimaryKey;
use crate::documents::{DocumentIdExtractionError, PrimaryKey};
use crate::update::new::{Deletion, Insertion, KvReaderFieldId, KvWriterFieldId, Update};
use crate::update::{AvailableIds, IndexDocumentsMethod};
use crate::{DocumentId, Error, FieldsIdsMap, Index, Result, UserError};
@ -98,37 +98,22 @@ impl<'p, 'pl: 'p> DocumentChanges<'p> for DocumentOperation<'pl> {
// TODO we must manage the TooManyDocumentIds,InvalidDocumentId
// we must manage the unwrap
let external_document_id =
match get_docid(&document, &[primary_key.name()]).unwrap() {
Some(document_id) => document_id,
None => {
return Err(UserError::MissingDocumentId {
match primary_key.document_id_from_top_level_map(&document)? {
Ok(document_id) => Ok(document_id),
Err(DocumentIdExtractionError::InvalidDocumentId(e)) => Err(e),
Err(DocumentIdExtractionError::MissingDocumentId) => {
Err(UserError::MissingDocumentId {
primary_key: primary_key.name().to_string(),
document: todo!(),
// document: obkv_to_object(document, &batch_index)?,
document: document.try_into().unwrap(),
})
}
.into());
Err(DocumentIdExtractionError::TooManyDocumentIds(_)) => {
Err(UserError::TooManyDocumentIds {
primary_key: primary_key.name().to_string(),
document: document.try_into().unwrap(),
})
}
};
// let external_document_id =
// match primary_key.document_id(document, &batch_index)? {
// Ok(document_id) => Ok(document_id),
// Err(DocumentIdExtractionError::InvalidDocumentId(user_error)) => {
// Err(user_error)
// }
// Err(DocumentIdExtractionError::MissingDocumentId) => {
// Err(UserError::MissingDocumentId {
// primary_key: primary_key.name().to_string(),
// document: obkv_to_object(document, &batch_index)?,
// })
// }
// Err(DocumentIdExtractionError::TooManyDocumentIds(_)) => {
// Err(UserError::TooManyDocumentIds {
// primary_key: primary_key.name().to_string(),
// document: obkv_to_object(document, &batch_index)?,
// })
// }
// }?;
}?;
let current_offset = iter.byte_offset();
let document_operation = InnerDocOp::Addition(DocumentOffset {
@ -397,25 +382,3 @@ impl MergeChanges for MergeDocumentForUpdates {
}
}
}
/// Returns the document ID based on the primary and
/// search for it recursively in zero-copy-deserialized documents.
fn get_docid<'p>(
map: &TopLevelMap<'p>,
primary_key: &[&str],
) -> serde_json::Result<Option<CowStr<'p>>> {
match primary_key {
[] => unreachable!("arrrgh"), // would None be ok?
[primary_key] => match map.0.get(*primary_key) {
Some(value) => match from_str::<u64>(value.get()) {
Ok(value) => Ok(Some(CowStr(Cow::Owned(value.to_string())))),
Err(_) => Ok(Some(from_str(value.get())?)),
},
None => Ok(None),
},
[head, tail @ ..] => match map.0.get(*head) {
Some(value) => get_docid(&from_str(value.get())?, tail),
None => Ok(None),
},
}
}

View File

@ -22,6 +22,14 @@ impl TryFrom<&'_ TopLevelMap<'_>> for Map<String, Value> {
}
}
impl TryFrom<TopLevelMap<'_>> for Map<String, Value> {
type Error = serde_json::Error;
fn try_from(tlmap: TopLevelMap<'_>) -> Result<Self, Self::Error> {
TryFrom::try_from(&tlmap)
}
}
impl<'p> ops::Deref for TopLevelMap<'p> {
type Target = BTreeMap<CowStr<'p>, &'p RawValue>;