Fix a bug where generated docids were not saved when indexing JSON docs

This commit is contained in:
Clément Renault 2020-11-01 12:14:44 +01:00
parent f0e63025b0
commit 600aa223c2
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
2 changed files with 34 additions and 13 deletions

View File

@ -739,6 +739,19 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let count = index.number_of_documents(&rtxn).unwrap();
assert_eq!(count, 3);
let docs = index.documents(&rtxn, vec![0, 1, 2]).unwrap();
let (kevin_id, _) = docs.iter().find(|(_, d)| d.get(1).unwrap() == br#""kevin""#).unwrap();
let (id, doc) = docs[*kevin_id as usize];
assert_eq!(id, *kevin_id);
// Check that this document is equal to the last
// one sent and that an UUID has been generated.
let mut doc_iter = doc.iter();
// This is an UUID, it must be 36 bytes long plus the 2 surrounding string quotes (").
doc_iter.next().filter(|(_, id)| id.len() == 36 + 2).unwrap();
assert_eq!(doc_iter.next(), Some((1, &br#""kevin""#[..])));
assert_eq!(doc_iter.next(), None);
drop(rtxn);
}

View File

@ -133,7 +133,7 @@ impl Transform<'_, '_> {
let mut uuid_buffer = [0; uuid::adapter::Hyphenated::LENGTH];
for result in documents {
let mut document = result?;
let document = result?;
obkv_buffer.clear();
let mut writer = obkv::KvWriter::new(&mut obkv_buffer);
@ -143,21 +143,11 @@ impl Transform<'_, '_> {
fields_ids_map.insert(&key).context("field id limit reached")?;
}
// We iterate in the fields ids ordered.
for (field_id, name) in fields_ids_map.iter() {
if let Some(value) = document.get(name) {
// We serialize the attribute values.
json_buffer.clear();
serde_json::to_writer(&mut json_buffer, value)?;
writer.insert(field_id, &json_buffer)?;
}
}
// We retrieve the user id from the document based on the primary key name,
// if the document id isn't present we generate a uuid.
let user_id = match document.remove(&primary_key_name) {
let user_id = match document.get(&primary_key_name) {
Some(value) => match value {
Value::String(string) => Cow::Owned(string),
Value::String(string) => Cow::Borrowed(string.as_str()),
Value::Number(number) => Cow::Owned(number.to_string()),
_ => return Err(anyhow!("documents ids must be either strings or numbers")),
},
@ -170,6 +160,24 @@ impl Transform<'_, '_> {
},
};
// We iterate in the fields ids ordered.
for (field_id, name) in fields_ids_map.iter() {
json_buffer.clear();
// We try to extract the value from the document and if we don't find anything
// and this should be the document id we return the one we generated.
if let Some(value) = document.get(name) {
// We serialize the attribute values.
serde_json::to_writer(&mut json_buffer, value)?;
writer.insert(field_id, &json_buffer)?;
}
else if field_id == primary_key {
// We serialize the document id.
serde_json::to_writer(&mut json_buffer, &user_id)?;
writer.insert(field_id, &json_buffer)?;
}
}
// We use the extracted/generated user id as the key for this document.
sorter.insert(user_id.as_bytes(), &obkv_buffer)?;
}