From 600aa223c2ee1ea925096ad3d9885424ade57d4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 1 Nov 2020 12:14:44 +0100 Subject: [PATCH] Fix a bug where generated docids were not saved when indexing JSON docs --- src/update/index_documents/mod.rs | 13 ++++++++++ src/update/index_documents/transform.rs | 34 +++++++++++++++---------- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/update/index_documents/mod.rs b/src/update/index_documents/mod.rs index e79190679..9b6083cec 100644 --- a/src/update/index_documents/mod.rs +++ b/src/update/index_documents/mod.rs @@ -739,6 +739,19 @@ mod tests { let rtxn = index.read_txn().unwrap(); let count = index.number_of_documents(&rtxn).unwrap(); assert_eq!(count, 3); + + let docs = index.documents(&rtxn, vec![0, 1, 2]).unwrap(); + let (kevin_id, _) = docs.iter().find(|(_, d)| d.get(1).unwrap() == br#""kevin""#).unwrap(); + let (id, doc) = docs[*kevin_id as usize]; + assert_eq!(id, *kevin_id); + + // Check that this document is equal to the last + // one sent and that an UUID has been generated. + let mut doc_iter = doc.iter(); + // This is an UUID, it must be 36 bytes long plus the 2 surrounding string quotes ("). + doc_iter.next().filter(|(_, id)| id.len() == 36 + 2).unwrap(); + assert_eq!(doc_iter.next(), Some((1, &br#""kevin""#[..]))); + assert_eq!(doc_iter.next(), None); drop(rtxn); } diff --git a/src/update/index_documents/transform.rs b/src/update/index_documents/transform.rs index 3fd7170dd..df0dccc3f 100644 --- a/src/update/index_documents/transform.rs +++ b/src/update/index_documents/transform.rs @@ -133,7 +133,7 @@ impl Transform<'_, '_> { let mut uuid_buffer = [0; uuid::adapter::Hyphenated::LENGTH]; for result in documents { - let mut document = result?; + let document = result?; obkv_buffer.clear(); let mut writer = obkv::KvWriter::new(&mut obkv_buffer); @@ -143,21 +143,11 @@ impl Transform<'_, '_> { fields_ids_map.insert(&key).context("field id limit reached")?; } - // We iterate in the fields ids ordered. - for (field_id, name) in fields_ids_map.iter() { - if let Some(value) = document.get(name) { - // We serialize the attribute values. - json_buffer.clear(); - serde_json::to_writer(&mut json_buffer, value)?; - writer.insert(field_id, &json_buffer)?; - } - } - // We retrieve the user id from the document based on the primary key name, // if the document id isn't present we generate a uuid. - let user_id = match document.remove(&primary_key_name) { + let user_id = match document.get(&primary_key_name) { Some(value) => match value { - Value::String(string) => Cow::Owned(string), + Value::String(string) => Cow::Borrowed(string.as_str()), Value::Number(number) => Cow::Owned(number.to_string()), _ => return Err(anyhow!("documents ids must be either strings or numbers")), }, @@ -170,6 +160,24 @@ impl Transform<'_, '_> { }, }; + // We iterate in the fields ids ordered. + for (field_id, name) in fields_ids_map.iter() { + json_buffer.clear(); + + // We try to extract the value from the document and if we don't find anything + // and this should be the document id we return the one we generated. + if let Some(value) = document.get(name) { + // We serialize the attribute values. + serde_json::to_writer(&mut json_buffer, value)?; + writer.insert(field_id, &json_buffer)?; + } + else if field_id == primary_key { + // We serialize the document id. + serde_json::to_writer(&mut json_buffer, &user_id)?; + writer.insert(field_id, &json_buffer)?; + } + } + // We use the extracted/generated user id as the key for this document. sorter.insert(user_id.as_bytes(), &obkv_buffer)?; }