From eeb0c70ea2a3f78038d98c20c82a789390dc3319 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Thu, 6 May 2021 21:16:40 +0200 Subject: [PATCH] meilisearch compatible primary key inference --- milli/src/update/index_documents/mod.rs | 2 +- milli/src/update/index_documents/transform.rs | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index a9ebcd20a..82f494591 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -329,7 +329,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { let mut reader = BufReader::new(reader); reader.fill_buf()?; - // Early return when there are no document to add + // Early return when there is no document to add if reader.buffer().is_empty() { return Ok(DocumentAdditionResult { nb_documents: 0, diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index e029a5135..ced5fe2c7 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -47,6 +47,10 @@ pub struct Transform<'t, 'i> { pub autogenerate_docids: bool, } +fn is_primary_key(field: impl AsRef) -> bool { + field.as_ref().to_lowercase().contains(DEFAULT_PRIMARY_KEY_NAME) +} + impl Transform<'_, '_> { pub fn output_from_json(self, reader: R, progress_callback: F) -> anyhow::Result where @@ -92,7 +96,7 @@ impl Transform<'_, '_> { // We extract the primary key from the first document in // the batch if it hasn't already been defined in the index let first = documents.peek().and_then(|r| r.as_ref().ok()); - let alternative_name = first.and_then(|doc| doc.keys().find(|k| k.contains(DEFAULT_PRIMARY_KEY_NAME)).cloned()); + let alternative_name = first.and_then(|doc| doc.keys().find(|f| is_primary_key(f)).cloned()); let (primary_key_id, primary_key) = compute_primary_key_pair( self.index.primary_key(self.rtxn)?, &mut fields_ids_map, @@ -232,7 +236,7 @@ impl Transform<'_, '_> { // The primary key is known so we must find the position in the CSV headers. headers.iter().position(|h| h == primary_key) }, - None => headers.iter().position(|h| h.contains("id")), + None => headers.iter().position(|f| is_primary_key(&f)), }; // Returns the field id in the fields ids map, create an "id" field