From 2ceeb51c37a98e8233edab175467e11101ef5dd2 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 15 Jun 2022 15:14:20 +0200 Subject: [PATCH] Support the auto-generated ids when validating documents --- milli/src/update/index_documents/mod.rs | 6 ++++-- milli/src/update/index_documents/validate.rs | 12 +++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index ae42483f0..1a1fc9a0e 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -778,7 +778,8 @@ mod tests { let indexing_config = IndexDocumentsConfig::default(); let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap(); - assert!(builder.add_documents(content).is_err()); + let (_builder, user_error) = builder.add_documents(content).unwrap(); + assert!(user_error.is_err()); wtxn.commit().unwrap(); // Check that there is no document. @@ -943,7 +944,8 @@ mod tests { let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ()) .unwrap(); - assert!(builder.add_documents(content).is_err()); + let (_builder, user_error) = builder.add_documents(content).unwrap(); + assert!(user_error.is_err()); wtxn.commit().unwrap(); // First we send 1 document with a valid id. diff --git a/milli/src/update/index_documents/validate.rs b/milli/src/update/index_documents/validate.rs index 0ed1f1cc0..0cb0b4aff 100644 --- a/milli/src/update/index_documents/validate.rs +++ b/milli/src/update/index_documents/validate.rs @@ -19,20 +19,20 @@ pub fn validate_documents_batch( reader: DocumentsBatchReader, ) -> Result, UserError>> { let mut cursor = reader.into_cursor(); - let documents_batch_index = cursor.documents_batch_index().clone(); + let mut documents_batch_index = cursor.documents_batch_index().clone(); // The primary key *field id* that has already been set for this index or the one // we will guess by searching for the first key that contains "id" as a substring. let (primary_key, primary_key_id) = match index.primary_key(rtxn)? { Some(primary_key) => match documents_batch_index.id(primary_key) { Some(id) => (primary_key, id), + None if autogenerate_docids => (primary_key, documents_batch_index.insert(primary_key)), None => { return match cursor.next_document()? { Some(first_document) => Ok(Err(UserError::MissingDocumentId { primary_key: primary_key.to_string(), document: obkv_to_object(&first_document, &documents_batch_index)?, })), - // If there is no document in this batch the best we can do is to return this error. None => Ok(Err(UserError::MissingPrimaryKey)), }; } @@ -40,10 +40,11 @@ pub fn validate_documents_batch( None => { let guessed = documents_batch_index .iter() - .filter(|(_, name)| name.contains("id")) + .filter(|(_, name)| name.to_lowercase().contains("id")) .min_by_key(|(fid, _)| *fid); match guessed { Some((id, name)) => (name.as_str(), *id), + None if autogenerate_docids => ("id", documents_batch_index.insert("id")), None => return Ok(Err(UserError::MissingPrimaryKey)), } } @@ -56,12 +57,16 @@ pub fn validate_documents_batch( _otherwise => None, }; + let mut count = 0; while let Some(document) = cursor.next_document()? { let document_id = match document.get(primary_key_id) { Some(document_id_bytes) => match validate_document_id_from_json(document_id_bytes)? { Ok(document_id) => document_id, Err(user_error) => return Ok(Err(user_error)), }, + None if autogenerate_docids => { + format!("{{auto-generated id of the {}nth document}}", count) + } None => { return Ok(Err(UserError::MissingDocumentId { primary_key: primary_key.to_string(), @@ -75,6 +80,7 @@ pub fn validate_documents_batch( return Ok(Err(UserError::from(user_error))); } } + count += 1; } Ok(Ok(cursor.into_reader()))