fix document id uniqueness bug

This commit is contained in:
mpostma 2020-07-07 14:52:49 +02:00
parent a0637c2c6d
commit 754efe1f42
2 changed files with 21 additions and 9 deletions

View File

@ -171,15 +171,23 @@ pub fn apply_addition<'a, 'b>(
let mut new_internal_docids = Vec::with_capacity(new_documents.len());
for mut document in new_documents {
let external_docids_get = |docid: &str| {
match (external_docids.get(docid), new_external_docids.get(docid)) {
(_, Some(&id))
| (Some(id), _) => Some(id as u32),
(None, None) => None,
}
};
let (internal_docid, external_docid) =
extract_document_id(
&primary_key,
&document,
&external_docids,
&external_docids_get,
&mut available_ids,
)?;
new_external_docids.insert(external_docid, internal_docid.0);
new_external_docids.insert(external_docid, internal_docid.0 as u64);
new_internal_docids.push(internal_docid);
if partial {

View File

@ -98,15 +98,17 @@ pub fn value_to_number(value: &Value) -> Option<Number> {
/// Validates a string representation to be a correct document id and returns
/// the corresponding id or generate a new one, this is the way we produce documents ids.
pub fn discover_document_id(
pub fn discover_document_id<F>(
docid: &str,
external_docids: &FstMapCow,
external_docids_get: F,
available_docids: &mut DiscoverIds<'_>,
) -> Result<DocumentId, SerializerError>
where
F: FnOnce(&str) -> Option<u32>
{
if docid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
match external_docids.get(docid) {
Some(id) => Ok(DocumentId(id as u32)),
match external_docids_get(docid) {
Some(id) => Ok(DocumentId(id)),
None => {
let internal_id = available_docids.next().expect("no more ids available");
Ok(internal_id)
@ -118,12 +120,14 @@ pub fn discover_document_id(
}
/// Extracts and validates the document id of a document.
pub fn extract_document_id(
pub fn extract_document_id<F>(
primary_key: &str,
document: &IndexMap<String, Value>,
external_docids: &FstMapCow,
external_docids_get: F,
available_docids: &mut DiscoverIds<'_>,
) -> Result<(DocumentId, String), SerializerError>
where
F: FnOnce(&str) -> Option<u32>
{
match document.get(primary_key) {
Some(value) => {
@ -132,7 +136,7 @@ pub fn extract_document_id(
Value::String(string) => string.clone(),
_ => return Err(SerializerError::InvalidDocumentIdFormat),
};
discover_document_id(&docid, external_docids, available_docids).map(|id| (id, docid))
discover_document_id(&docid, external_docids_get, available_docids).map(|id| (id, docid))
}
None => Err(SerializerError::DocumentIdNotFound),
}