From 2042229927dffcbc9fddb55168d8955842efcbfd Mon Sep 17 00:00:00 2001
From: curquiza <curquiza@users.noreply.github.com>
Date: Thu, 19 Oct 2023 08:50:11 +0000
Subject: [PATCH 1/4] Update version for the next release (v1.4.2) in
 Cargo.toml

---
 Cargo.lock | 28 ++++++++++++++--------------
 Cargo.toml |  2 +-
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index b3991d130..aa7df19ed 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -468,7 +468,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
 
 [[package]]
 name = "benchmarks"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
  "anyhow",
  "bytes",
@@ -1206,7 +1206,7 @@ dependencies = [
 
 [[package]]
 name = "dump"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
  "anyhow",
  "big_s",
@@ -1417,7 +1417,7 @@ dependencies = [
 
 [[package]]
 name = "file-store"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
  "faux",
  "tempfile",
@@ -1439,7 +1439,7 @@ dependencies = [
 
 [[package]]
 name = "filter-parser"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
  "insta",
  "nom",
@@ -1459,7 +1459,7 @@ dependencies = [
 
 [[package]]
 name = "flatten-serde-json"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
  "criterion",
  "serde_json",
@@ -1577,7 +1577,7 @@ dependencies = [
 
 [[package]]
 name = "fuzzers"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
  "arbitrary",
  "clap",
@@ -1891,7 +1891,7 @@ dependencies = [
 
 [[package]]
 name = "index-scheduler"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
  "anyhow",
  "big_s",
@@ -2088,7 +2088,7 @@ dependencies = [
 
 [[package]]
 name = "json-depth-checker"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
  "criterion",
  "serde_json",
@@ -2500,7 +2500,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
 
 [[package]]
 name = "meili-snap"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
  "insta",
  "md5",
@@ -2509,7 +2509,7 @@ dependencies = [
 
 [[package]]
 name = "meilisearch"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
  "actix-cors",
  "actix-http",
@@ -2599,7 +2599,7 @@ dependencies = [
 
 [[package]]
 name = "meilisearch-auth"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
  "base64 0.21.2",
  "enum-iterator",
@@ -2618,7 +2618,7 @@ dependencies = [
 
 [[package]]
 name = "meilisearch-types"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
  "actix-web",
  "anyhow",
@@ -2672,7 +2672,7 @@ dependencies = [
 
 [[package]]
 name = "milli"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
  "big_s",
  "bimap",
@@ -2994,7 +2994,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
 
 [[package]]
 name = "permissive-json-pointer"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
  "big_s",
  "serde_json",
diff --git a/Cargo.toml b/Cargo.toml
index 05c7b1012..a40af10f7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,7 +18,7 @@ members = [
 ]
 
 [workspace.package]
-version = "1.4.1"
+version = "1.4.2"
 authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
 description = "Meilisearch HTTP server"
 homepage = "https://meilisearch.com"

From 5fe7c4545a57d7356d25668e7ecbdcf1616082b2 Mon Sep 17 00:00:00 2001
From: Vivek Kumar <vivek.26@outlook.com>
Date: Thu, 19 Oct 2023 16:48:45 +0530
Subject: [PATCH 2/4] compute all candidates correctly when skipping

---
 milli/src/search/new/bucket_sort.rs | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs
index cf2f08cce..df9c14c7d 100644
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@@ -46,9 +46,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
         if let Some(distinct_fid) = distinct_fid {
             let mut excluded = RoaringBitmap::new();
             let mut results = vec![];
-            let mut skip = 0;
             for docid in universe.iter() {
-                if results.len() >= length {
+                if results.len() >= from + length {
                     break;
                 }
                 if excluded.contains(docid) {
@@ -56,16 +55,16 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
                 }
 
                 distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
-                skip += 1;
-                if skip <= from {
-                    continue;
-                }
-
                 results.push(docid);
             }
 
             let mut all_candidates = universe - excluded;
             all_candidates.extend(results.iter().copied());
+            if results.len() >= from {
+                results.drain(..from);
+            } else {
+                results.clear();
+            }
 
             return Ok(BucketSortOutput {
                 scores: vec![Default::default(); results.len()],

From 32c78ac8b1ccccf2a41639c159c11ff3eee46a00 Mon Sep 17 00:00:00 2001
From: Vivek Kumar <vivek.26@outlook.com>
Date: Thu, 19 Oct 2023 16:50:14 +0530
Subject: [PATCH 3/4] add/update tests when search with distinct attribute &
 pagination with no ranking

---
 meilisearch/tests/search/distinct.rs | 228 ++++++++++++++++++++++++---
 milli/tests/search/distinct.rs       |   8 +-
 2 files changed, 207 insertions(+), 29 deletions(-)

diff --git a/meilisearch/tests/search/distinct.rs b/meilisearch/tests/search/distinct.rs
index 93c5197a6..14ce88da2 100644
--- a/meilisearch/tests/search/distinct.rs
+++ b/meilisearch/tests/search/distinct.rs
@@ -6,21 +6,109 @@ use crate::json;
 
 pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
     json!([
-        {"productId": 1, "shopId": 1},
-        {"productId": 2, "shopId": 1},
-        {"productId": 3, "shopId": 2},
-        {"productId": 4, "shopId": 2},
-        {"productId": 5, "shopId": 3},
-        {"productId": 6, "shopId": 3},
-        {"productId": 7, "shopId": 4},
-        {"productId": 8, "shopId": 4},
-        {"productId": 9, "shopId": 5},
-        {"productId": 10, "shopId": 5}
+      {
+        "id": 1,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Brown"
+      },
+      {
+        "id": 2,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Black"
+      },
+      {
+        "id": 3,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Blue"
+      },
+      {
+        "id": 4,
+        "description": "T-Shirt",
+        "brand": "Nike",
+        "product_id": "789012",
+        "color": "Red"
+      },
+      {
+        "id": 5,
+        "description": "T-Shirt",
+        "brand": "Nike",
+        "product_id": "789012",
+        "color": "Blue"
+      },
+      {
+        "id": 6,
+        "description": "Running Shoes",
+        "brand": "Adidas",
+        "product_id": "456789",
+        "color": "Black"
+      },
+      {
+        "id": 7,
+        "description": "Running Shoes",
+        "brand": "Adidas",
+        "product_id": "456789",
+        "color": "White"
+      },
+      {
+        "id": 8,
+        "description": "Hoodie",
+        "brand": "Puma",
+        "product_id": "987654",
+        "color": "Gray"
+      },
+      {
+        "id": 9,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Green"
+      },
+      {
+        "id": 10,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Red"
+      },
+      {
+        "id": 11,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Blue"
+      },
+      {
+        "id": 12,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Indigo"
+      },
+      {
+        "id": 13,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Black"
+      },
+      {
+        "id": 14,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Stone Wash"
+      }
     ])
 });
 
-pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
-pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
+pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id";
+pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id";
 
 /// testing: https://github.com/meilisearch/meilisearch/issues/4078
 #[actix_rt::test]
@@ -33,31 +121,121 @@ async fn distinct_search_with_offset_no_ranking() {
     index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
     index.wait_task(1).await;
 
-    fn get_hits(Value(response): Value) -> Vec<i64> {
+    fn get_hits(response: &Value) -> Vec<&str> {
         let hits_array = response["hits"].as_array().unwrap();
-        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
     }
 
-    let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
+    let hits = get_hits(&response);
     snapshot!(code, @"200 OK");
     snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @"[1, 2]");
+    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
+    snapshot!(response["estimatedTotalHits"] , @"11");
 
-    let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
+    let hits = get_hits(&response);
     snapshot!(code, @"200 OK");
     snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @"[3, 4]");
+    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
+    snapshot!(response["estimatedTotalHits"], @"10");
 
-    let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
+    let hits = get_hits(&response);
     snapshot!(code, @"200 OK");
     snapshot!(hits.len(), @"1");
-    snapshot!(format!("{:?}", hits), @"[5]");
+    snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
 
-    let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
+    let hits = get_hits(&response);
     snapshot!(code, @"200 OK");
     snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+}
+
+/// testing: https://github.com/meilisearch/meilisearch/issues/4130
+#[actix_rt::test]
+async fn distinct_search_with_pagination_no_ranking() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
+    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
+    index.wait_task(1).await;
+
+    fn get_hits(response: &Value) -> Vec<&str> {
+        let hits_array = response["hits"].as_array().unwrap();
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
+    }
+
+    let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["page"], @"0");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
+    snapshot!(response["page"], @"1");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
+    snapshot!(response["page"], @"2");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
+    snapshot!(response["page"], @"3");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["page"], @"4");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"3");
+    snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
+    snapshot!(response["page"], @"2");
+    snapshot!(response["totalPages"], @"2");
+    snapshot!(response["totalHits"], @"6");
 }
diff --git a/milli/tests/search/distinct.rs b/milli/tests/search/distinct.rs
index e1876286c..fc890dfe8 100644
--- a/milli/tests/search/distinct.rs
+++ b/milli/tests/search/distinct.rs
@@ -202,7 +202,7 @@ test_distinct!(
     EXTERNAL_DOCUMENTS_IDS.len(),
     1,
     vec![],
-    2
+    3
 );
 test_distinct!(
     // testing: https://github.com/meilisearch/meilisearch/issues/4078
@@ -212,7 +212,7 @@ test_distinct!(
     1,
     2,
     vec![],
-    1
+    3
 );
 test_distinct!(
     // testing: https://github.com/meilisearch/meilisearch/issues/4078
@@ -222,7 +222,7 @@ test_distinct!(
     EXTERNAL_DOCUMENTS_IDS.len(),
     2,
     vec![],
-    5
+    7
 );
 test_distinct!(
     // testing: https://github.com/meilisearch/meilisearch/issues/4078
@@ -232,5 +232,5 @@ test_distinct!(
     2,
     4,
     vec![],
-    3
+    7
 );

From 2bae9550c8f1f5874c4e1bab5c3fe767dce416ec Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis.dureuil@gmail.com>
Date: Thu, 19 Oct 2023 15:58:25 +0200
Subject: [PATCH 4/4] Add explanatory comment

---
 milli/src/search/new/bucket_sort.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs
index df9c14c7d..b46f6124f 100644
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@@ -60,6 +60,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
 
             let mut all_candidates = universe - excluded;
             all_candidates.extend(results.iter().copied());
+            // drain the results of the skipped elements
+            // this **must** be done **after** writing the entire results in `all_candidates` to ensure
+            // e.g. estimatedTotalHits is correct.
             if results.len() >= from {
                 results.drain(..from);
             } else {