From 313c36246159f185c6e2734aa7294e047babee91 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Thu, 6 May 2021 18:14:16 +0200 Subject: [PATCH 1/3] early return on empty document addition --- milli/src/update/index_documents/mod.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 3acae7821..a9ebcd20a 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use std::collections::HashSet; use std::fs::File; -use std::io::{self, Seek, SeekFrom}; +use std::io::{self, Seek, SeekFrom, BufReader, BufRead}; use std::num::{NonZeroU32, NonZeroUsize}; use std::str; use std::sync::mpsc::sync_channel; @@ -326,6 +326,16 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { R: io::Read, F: Fn(UpdateIndexingStep, u64) + Sync, { + let mut reader = BufReader::new(reader); + reader.fill_buf()?; + + // Early return when there are no document to add + if reader.buffer().is_empty() { + return Ok(DocumentAdditionResult { + nb_documents: 0, + }) + } + self.index.set_updated_at(self.wtxn, &Utc::now())?; let before_transform = Instant::now(); let update_id = self.update_id; From eeb0c70ea2a3f78038d98c20c82a789390dc3319 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Thu, 6 May 2021 21:16:40 +0200 Subject: [PATCH 2/3] meilisearch compatible primary key inference --- milli/src/update/index_documents/mod.rs | 2 +- milli/src/update/index_documents/transform.rs | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index a9ebcd20a..82f494591 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -329,7 +329,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { let mut reader = BufReader::new(reader); reader.fill_buf()?; - // Early return when there are no document to add + // Early return when there is no document to add if reader.buffer().is_empty() { return Ok(DocumentAdditionResult { nb_documents: 0, diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index e029a5135..ced5fe2c7 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -47,6 +47,10 @@ pub struct Transform<'t, 'i> { pub autogenerate_docids: bool, } +fn is_primary_key(field: impl AsRef) -> bool { + field.as_ref().to_lowercase().contains(DEFAULT_PRIMARY_KEY_NAME) +} + impl Transform<'_, '_> { pub fn output_from_json(self, reader: R, progress_callback: F) -> anyhow::Result where @@ -92,7 +96,7 @@ impl Transform<'_, '_> { // We extract the primary key from the first document in // the batch if it hasn't already been defined in the index let first = documents.peek().and_then(|r| r.as_ref().ok()); - let alternative_name = first.and_then(|doc| doc.keys().find(|k| k.contains(DEFAULT_PRIMARY_KEY_NAME)).cloned()); + let alternative_name = first.and_then(|doc| doc.keys().find(|f| is_primary_key(f)).cloned()); let (primary_key_id, primary_key) = compute_primary_key_pair( self.index.primary_key(self.rtxn)?, &mut fields_ids_map, @@ -232,7 +236,7 @@ impl Transform<'_, '_> { // The primary key is known so we must find the position in the CSV headers. headers.iter().position(|h| h == primary_key) }, - None => headers.iter().position(|h| h.contains("id")), + None => headers.iter().position(|f| is_primary_key(&f)), }; // Returns the field id in the fields ids map, create an "id" field From 57898d8a907b939f4bef661c3a7e3e9db58745b9 Mon Sep 17 00:00:00 2001 From: marin postma Date: Wed, 2 Jun 2021 19:05:12 +0200 Subject: [PATCH 3/3] fix silent deserialize error --- milli/src/update/index_documents/transform.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index ced5fe2c7..fd508d6a4 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -95,7 +95,11 @@ impl Transform<'_, '_> { // We extract the primary key from the first document in // the batch if it hasn't already been defined in the index - let first = documents.peek().and_then(|r| r.as_ref().ok()); + let first = match documents.peek().map(Result::as_ref).transpose() { + Ok(first) => first, + Err(_) => return Err(documents.next().unwrap().unwrap_err().into()), + }; + let alternative_name = first.and_then(|doc| doc.keys().find(|f| is_primary_key(f)).cloned()); let (primary_key_id, primary_key) = compute_primary_key_pair( self.index.primary_key(self.rtxn)?, @@ -236,7 +240,7 @@ impl Transform<'_, '_> { // The primary key is known so we must find the position in the CSV headers. headers.iter().position(|h| h == primary_key) }, - None => headers.iter().position(|f| is_primary_key(&f)), + None => headers.iter().position(is_primary_key), }; // Returns the field id in the fields ids map, create an "id" field