Merge #4151

4151: Bring back changes from v1.4.2 into `release-v1.5.0` r=dureuill a=curquiza This will bring the fixes in v1.4.2 for v1.5.0 release Co-authored-by: curquiza <curquiza@users.noreply.github.com> Co-authored-by: Vivek Kumar <vivek.26@outlook.com> Co-authored-by: Louis Dureuil <louis.dureuil@gmail.com>
2025-07-03 20:07:09 +02:00 · 2023-10-23 10:11:11 +00:00 · 2023-10-23 10:11:11 +00:00 · e4c24ca6a3
commit e4c24ca6a3
parent eae9eab181 2bae9550c8
5 changed files with 231 additions and 51 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -468,7 +468,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"

 [[package]]
 name = "benchmarks"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
 "anyhow",
 "bytes",
@ -1206,7 +1206,7 @@ dependencies = [

 [[package]]
 name = "dump"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
 "anyhow",
 "big_s",
@ -1417,7 +1417,7 @@ dependencies = [

 [[package]]
 name = "file-store"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
 "faux",
 "tempfile",
@ -1439,7 +1439,7 @@ dependencies = [

 [[package]]
 name = "filter-parser"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
 "insta",
 "nom",
@ -1459,7 +1459,7 @@ dependencies = [

 [[package]]
 name = "flatten-serde-json"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
 "criterion",
 "serde_json",
@ -1577,7 +1577,7 @@ dependencies = [

 [[package]]
 name = "fuzzers"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
 "arbitrary",
 "clap",
@ -1891,7 +1891,7 @@ dependencies = [

 [[package]]
 name = "index-scheduler"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
 "anyhow",
 "big_s",
@ -2088,7 +2088,7 @@ dependencies = [

 [[package]]
 name = "json-depth-checker"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
 "criterion",
 "serde_json",
@ -2500,7 +2500,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"

 [[package]]
 name = "meili-snap"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
 "insta",
 "md5",
@ -2509,7 +2509,7 @@ dependencies = [

 [[package]]
 name = "meilisearch"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
 "actix-cors",
 "actix-http",
@ -2599,7 +2599,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-auth"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
 "base64 0.21.2",
 "enum-iterator",
@ -2618,7 +2618,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-types"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
 "actix-web",
 "anyhow",
@ -2672,7 +2672,7 @@ dependencies = [

 [[package]]
 name = "milli"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
 "big_s",
 "bimap",
@ -2994,7 +2994,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"

 [[package]]
 name = "permissive-json-pointer"
-version = "1.4.1"
+version = "1.4.2"
 dependencies = [
 "big_s",
 "serde_json",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -18,7 +18,7 @@ members = [
 ]

 [workspace.package]
-version = "1.4.1"
+version = "1.4.2"
 authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
 description = "Meilisearch HTTP server"
 homepage = "https://meilisearch.com"
--- a/meilisearch/tests/search/distinct.rs
+++ b/meilisearch/tests/search/distinct.rs
@ -6,21 +6,109 @@ use crate::json;

 pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    json!([
-        {"productId": 1, "shopId": 1},
-        {"productId": 2, "shopId": 1},
-        {"productId": 3, "shopId": 2},
-        {"productId": 4, "shopId": 2},
-        {"productId": 5, "shopId": 3},
-        {"productId": 6, "shopId": 3},
-        {"productId": 7, "shopId": 4},
-        {"productId": 8, "shopId": 4},
-        {"productId": 9, "shopId": 5},
-        {"productId": 10, "shopId": 5}
+      {
+        "id": 1,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Brown"
+      },
+      {
+        "id": 2,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Black"
+      },
+      {
+        "id": 3,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Blue"
+      },
+      {
+        "id": 4,
+        "description": "T-Shirt",
+        "brand": "Nike",
+        "product_id": "789012",
+        "color": "Red"
+      },
+      {
+        "id": 5,
+        "description": "T-Shirt",
+        "brand": "Nike",
+        "product_id": "789012",
+        "color": "Blue"
+      },
+      {
+        "id": 6,
+        "description": "Running Shoes",
+        "brand": "Adidas",
+        "product_id": "456789",
+        "color": "Black"
+      },
+      {
+        "id": 7,
+        "description": "Running Shoes",
+        "brand": "Adidas",
+        "product_id": "456789",
+        "color": "White"
+      },
+      {
+        "id": 8,
+        "description": "Hoodie",
+        "brand": "Puma",
+        "product_id": "987654",
+        "color": "Gray"
+      },
+      {
+        "id": 9,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Green"
+      },
+      {
+        "id": 10,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Red"
+      },
+      {
+        "id": 11,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Blue"
+      },
+      {
+        "id": 12,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Indigo"
+      },
+      {
+        "id": 13,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Black"
+      },
+      {
+        "id": 14,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Stone Wash"
+      }
    ])
 });

-pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
-pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
+pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id";
+pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id";

 /// testing: https://github.com/meilisearch/meilisearch/issues/4078
 #[actix_rt::test]
@ -33,31 +121,121 @@ async fn distinct_search_with_offset_no_ranking() {
    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
    index.wait_task(1).await;

-    fn get_hits(Value(response): Value) -> Vec<i64> {
+    fn get_hits(response: &Value) -> Vec<&str> {
        let hits_array = response["hits"].as_array().unwrap();
-        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
    }

-    let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
+    let hits = get_hits(&response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @"[1, 2]");
+    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
+    snapshot!(response["estimatedTotalHits"] , @"11");

-    let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
+    let hits = get_hits(&response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @"[3, 4]");
+    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
+    snapshot!(response["estimatedTotalHits"], @"10");

-    let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
+    let hits = get_hits(&response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"1");
-    snapshot!(format!("{:?}", hits), @"[5]");
+    snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");

-    let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
+    let hits = get_hits(&response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+}
+
+/// testing: https://github.com/meilisearch/meilisearch/issues/4130
+#[actix_rt::test]
+async fn distinct_search_with_pagination_no_ranking() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
+    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
+    index.wait_task(1).await;
+
+    fn get_hits(response: &Value) -> Vec<&str> {
+        let hits_array = response["hits"].as_array().unwrap();
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
+    }
+
+    let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["page"], @"0");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
+    snapshot!(response["page"], @"1");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
+    snapshot!(response["page"], @"2");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
+    snapshot!(response["page"], @"3");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["page"], @"4");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"3");
+    snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
+    snapshot!(response["page"], @"2");
+    snapshot!(response["totalPages"], @"2");
+    snapshot!(response["totalHits"], @"6");
 }
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@ -46,9 +46,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
        if let Some(distinct_fid) = distinct_fid {
            let mut excluded = RoaringBitmap::new();
            let mut results = vec![];
-            let mut skip = 0;
            for docid in universe.iter() {
-                if results.len() >= length {
+                if results.len() >= from + length {
                    break;
                }
                if excluded.contains(docid) {
@ -56,16 +55,19 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
                }

                distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
-                skip += 1;
-                if skip <= from {
-                    continue;
-                }
-
                results.push(docid);
            }

            let mut all_candidates = universe - excluded;
            all_candidates.extend(results.iter().copied());
+            // drain the results of the skipped elements
+            // this **must** be done **after** writing the entire results in `all_candidates` to ensure
+            // e.g. estimatedTotalHits is correct.
+            if results.len() >= from {
+                results.drain(..from);
+            } else {
+                results.clear();
+            }

            return Ok(BucketSortOutput {
                scores: vec![Default::default(); results.len()],
--- a/milli/tests/search/distinct.rs
+++ b/milli/tests/search/distinct.rs
@ -202,7 +202,7 @@ test_distinct!(
    EXTERNAL_DOCUMENTS_IDS.len(),
    1,
    vec![],
-    2
+    3
 );
 test_distinct!(
    // testing: https://github.com/meilisearch/meilisearch/issues/4078
@ -212,7 +212,7 @@ test_distinct!(
    1,
    2,
    vec![],
-    1
+    3
 );
 test_distinct!(
    // testing: https://github.com/meilisearch/meilisearch/issues/4078
@ -222,7 +222,7 @@ test_distinct!(
    EXTERNAL_DOCUMENTS_IDS.len(),
    2,
    vec![],
-    5
+    7
 );
 test_distinct!(
    // testing: https://github.com/meilisearch/meilisearch/issues/4078
@ -232,5 +232,5 @@ test_distinct!(
    2,
    4,
    vec![],
-    3
+    7
 );