From 313c36246159f185c6e2734aa7294e047babee91 Mon Sep 17 00:00:00 2001
From: Marin Postma <postma.marin@protonmail.com>
Date: Thu, 6 May 2021 18:14:16 +0200
Subject: [PATCH 1/3] early return on empty document addition

---
 milli/src/update/index_documents/mod.rs | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 3acae7821..a9ebcd20a 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -1,7 +1,7 @@
 use std::borrow::Cow;
 use std::collections::HashSet;
 use std::fs::File;
-use std::io::{self, Seek, SeekFrom};
+use std::io::{self, Seek, SeekFrom, BufReader, BufRead};
 use std::num::{NonZeroU32, NonZeroUsize};
 use std::str;
 use std::sync::mpsc::sync_channel;
@@ -326,6 +326,16 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
         R: io::Read,
         F: Fn(UpdateIndexingStep, u64) + Sync,
     {
+        let mut reader = BufReader::new(reader);
+        reader.fill_buf()?;
+
+        // Early return when there are no document to add
+        if reader.buffer().is_empty() {
+            return Ok(DocumentAdditionResult {
+                nb_documents: 0,
+            })
+        }
+
         self.index.set_updated_at(self.wtxn, &Utc::now())?;
         let before_transform = Instant::now();
         let update_id = self.update_id;

From eeb0c70ea2a3f78038d98c20c82a789390dc3319 Mon Sep 17 00:00:00 2001
From: Marin Postma <postma.marin@protonmail.com>
Date: Thu, 6 May 2021 21:16:40 +0200
Subject: [PATCH 2/3] meilisearch compatible primary key inference

---
 milli/src/update/index_documents/mod.rs       | 2 +-
 milli/src/update/index_documents/transform.rs | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index a9ebcd20a..82f494591 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -329,7 +329,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
         let mut reader = BufReader::new(reader);
         reader.fill_buf()?;
 
-        // Early return when there are no document to add
+        // Early return when there is no document to add
         if reader.buffer().is_empty() {
             return Ok(DocumentAdditionResult {
                 nb_documents: 0,
diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs
index e029a5135..ced5fe2c7 100644
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -47,6 +47,10 @@ pub struct Transform<'t, 'i> {
     pub autogenerate_docids: bool,
 }
 
+fn is_primary_key(field: impl AsRef<str>) -> bool {
+    field.as_ref().to_lowercase().contains(DEFAULT_PRIMARY_KEY_NAME)
+}
+
 impl Transform<'_, '_> {
     pub fn output_from_json<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<TransformOutput>
     where
@@ -92,7 +96,7 @@ impl Transform<'_, '_> {
         // We extract the primary key from the first document in
         // the batch if it hasn't already been defined in the index
         let first = documents.peek().and_then(|r| r.as_ref().ok());
-        let alternative_name = first.and_then(|doc| doc.keys().find(|k| k.contains(DEFAULT_PRIMARY_KEY_NAME)).cloned());
+        let alternative_name = first.and_then(|doc| doc.keys().find(|f| is_primary_key(f)).cloned());
         let (primary_key_id, primary_key) = compute_primary_key_pair(
             self.index.primary_key(self.rtxn)?,
             &mut fields_ids_map,
@@ -232,7 +236,7 @@ impl Transform<'_, '_> {
                // The primary key is known so we must find the position in the CSV headers.
                headers.iter().position(|h| h == primary_key)
             },
-            None => headers.iter().position(|h| h.contains("id")),
+            None => headers.iter().position(|f| is_primary_key(&f)),
         };
 
         // Returns the field id in the fields ids map, create an "id" field

From 57898d8a907b939f4bef661c3a7e3e9db58745b9 Mon Sep 17 00:00:00 2001
From: marin postma <postma.marin@protonmail.com>
Date: Wed, 2 Jun 2021 19:05:12 +0200
Subject: [PATCH 3/3] fix silent deserialize error

---
 milli/src/update/index_documents/transform.rs | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs
index ced5fe2c7..fd508d6a4 100644
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -95,7 +95,11 @@ impl Transform<'_, '_> {
 
         // We extract the primary key from the first document in
         // the batch if it hasn't already been defined in the index
-        let first = documents.peek().and_then(|r| r.as_ref().ok());
+        let first = match documents.peek().map(Result::as_ref).transpose() {
+            Ok(first) => first,
+            Err(_) => return Err(documents.next().unwrap().unwrap_err().into()),
+        };
+
         let alternative_name = first.and_then(|doc| doc.keys().find(|f| is_primary_key(f)).cloned());
         let (primary_key_id, primary_key) = compute_primary_key_pair(
             self.index.primary_key(self.rtxn)?,
@@ -236,7 +240,7 @@ impl Transform<'_, '_> {
                // The primary key is known so we must find the position in the CSV headers.
                headers.iter().position(|h| h == primary_key)
             },
-            None => headers.iter().position(|f| is_primary_key(&f)),
+            None => headers.iter().position(is_primary_key),
         };
 
         // Returns the field id in the fields ids map, create an "id" field