From 2042229927dffcbc9fddb55168d8955842efcbfd Mon Sep 17 00:00:00 2001 From: curquiza Date: Thu, 19 Oct 2023 08:50:11 +0000 Subject: [PATCH 1/4] Update version for the next release (v1.4.2) in Cargo.toml --- Cargo.lock | 28 ++++++++++++++-------------- Cargo.toml | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b3991d130..aa7df19ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -468,7 +468,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "benchmarks" -version = "1.4.1" +version = "1.4.2" dependencies = [ "anyhow", "bytes", @@ -1206,7 +1206,7 @@ dependencies = [ [[package]] name = "dump" -version = "1.4.1" +version = "1.4.2" dependencies = [ "anyhow", "big_s", @@ -1417,7 +1417,7 @@ dependencies = [ [[package]] name = "file-store" -version = "1.4.1" +version = "1.4.2" dependencies = [ "faux", "tempfile", @@ -1439,7 +1439,7 @@ dependencies = [ [[package]] name = "filter-parser" -version = "1.4.1" +version = "1.4.2" dependencies = [ "insta", "nom", @@ -1459,7 +1459,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "1.4.1" +version = "1.4.2" dependencies = [ "criterion", "serde_json", @@ -1577,7 +1577,7 @@ dependencies = [ [[package]] name = "fuzzers" -version = "1.4.1" +version = "1.4.2" dependencies = [ "arbitrary", "clap", @@ -1891,7 +1891,7 @@ dependencies = [ [[package]] name = "index-scheduler" -version = "1.4.1" +version = "1.4.2" dependencies = [ "anyhow", "big_s", @@ -2088,7 +2088,7 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "1.4.1" +version = "1.4.2" dependencies = [ "criterion", "serde_json", @@ -2500,7 +2500,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meili-snap" -version = "1.4.1" +version = "1.4.2" dependencies = [ "insta", "md5", @@ -2509,7 +2509,7 @@ dependencies = [ [[package]] name = "meilisearch" -version = "1.4.1" +version = "1.4.2" dependencies = [ "actix-cors", "actix-http", @@ -2599,7 +2599,7 @@ dependencies = [ [[package]] name = "meilisearch-auth" -version = "1.4.1" +version = "1.4.2" dependencies = [ "base64 0.21.2", "enum-iterator", @@ -2618,7 +2618,7 @@ dependencies = [ [[package]] name = "meilisearch-types" -version = "1.4.1" +version = "1.4.2" dependencies = [ "actix-web", "anyhow", @@ -2672,7 +2672,7 @@ dependencies = [ [[package]] name = "milli" -version = "1.4.1" +version = "1.4.2" dependencies = [ "big_s", "bimap", @@ -2994,7 +2994,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "permissive-json-pointer" -version = "1.4.1" +version = "1.4.2" dependencies = [ "big_s", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 05c7b1012..a40af10f7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ members = [ ] [workspace.package] -version = "1.4.1" +version = "1.4.2" authors = ["Quentin de Quelen ", "Clément Renault "] description = "Meilisearch HTTP server" homepage = "https://meilisearch.com" From 5fe7c4545a57d7356d25668e7ecbdcf1616082b2 Mon Sep 17 00:00:00 2001 From: Vivek Kumar Date: Thu, 19 Oct 2023 16:48:45 +0530 Subject: [PATCH 2/4] compute all candidates correctly when skipping --- milli/src/search/new/bucket_sort.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index cf2f08cce..df9c14c7d 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -46,9 +46,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( if let Some(distinct_fid) = distinct_fid { let mut excluded = RoaringBitmap::new(); let mut results = vec![]; - let mut skip = 0; for docid in universe.iter() { - if results.len() >= length { + if results.len() >= from + length { break; } if excluded.contains(docid) { @@ -56,16 +55,16 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( } distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?; - skip += 1; - if skip <= from { - continue; - } - results.push(docid); } let mut all_candidates = universe - excluded; all_candidates.extend(results.iter().copied()); + if results.len() >= from { + results.drain(..from); + } else { + results.clear(); + } return Ok(BucketSortOutput { scores: vec![Default::default(); results.len()], From 32c78ac8b1ccccf2a41639c159c11ff3eee46a00 Mon Sep 17 00:00:00 2001 From: Vivek Kumar Date: Thu, 19 Oct 2023 16:50:14 +0530 Subject: [PATCH 3/4] add/update tests when search with distinct attribute & pagination with no ranking --- meilisearch/tests/search/distinct.rs | 228 ++++++++++++++++++++++++--- milli/tests/search/distinct.rs | 8 +- 2 files changed, 207 insertions(+), 29 deletions(-) diff --git a/meilisearch/tests/search/distinct.rs b/meilisearch/tests/search/distinct.rs index 93c5197a6..14ce88da2 100644 --- a/meilisearch/tests/search/distinct.rs +++ b/meilisearch/tests/search/distinct.rs @@ -6,21 +6,109 @@ use crate::json; pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { json!([ - {"productId": 1, "shopId": 1}, - {"productId": 2, "shopId": 1}, - {"productId": 3, "shopId": 2}, - {"productId": 4, "shopId": 2}, - {"productId": 5, "shopId": 3}, - {"productId": 6, "shopId": 3}, - {"productId": 7, "shopId": 4}, - {"productId": 8, "shopId": 4}, - {"productId": 9, "shopId": 5}, - {"productId": 10, "shopId": 5} + { + "id": 1, + "description": "Leather Jacket", + "brand": "Lee Jeans", + "product_id": "123456", + "color": "Brown" + }, + { + "id": 2, + "description": "Leather Jacket", + "brand": "Lee Jeans", + "product_id": "123456", + "color": "Black" + }, + { + "id": 3, + "description": "Leather Jacket", + "brand": "Lee Jeans", + "product_id": "123456", + "color": "Blue" + }, + { + "id": 4, + "description": "T-Shirt", + "brand": "Nike", + "product_id": "789012", + "color": "Red" + }, + { + "id": 5, + "description": "T-Shirt", + "brand": "Nike", + "product_id": "789012", + "color": "Blue" + }, + { + "id": 6, + "description": "Running Shoes", + "brand": "Adidas", + "product_id": "456789", + "color": "Black" + }, + { + "id": 7, + "description": "Running Shoes", + "brand": "Adidas", + "product_id": "456789", + "color": "White" + }, + { + "id": 8, + "description": "Hoodie", + "brand": "Puma", + "product_id": "987654", + "color": "Gray" + }, + { + "id": 9, + "description": "Sweater", + "brand": "Gap", + "product_id": "234567", + "color": "Green" + }, + { + "id": 10, + "description": "Sweater", + "brand": "Gap", + "product_id": "234567", + "color": "Red" + }, + { + "id": 11, + "description": "Sweater", + "brand": "Gap", + "product_id": "234567", + "color": "Blue" + }, + { + "id": 12, + "description": "Jeans", + "brand": "Levi's", + "product_id": "345678", + "color": "Indigo" + }, + { + "id": 13, + "description": "Jeans", + "brand": "Levi's", + "product_id": "345678", + "color": "Black" + }, + { + "id": 14, + "description": "Jeans", + "brand": "Levi's", + "product_id": "345678", + "color": "Stone Wash" + } ]) }); -pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId"; -pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId"; +pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id"; +pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id"; /// testing: https://github.com/meilisearch/meilisearch/issues/4078 #[actix_rt::test] @@ -33,31 +121,121 @@ async fn distinct_search_with_offset_no_ranking() { index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await; index.wait_task(1).await; - fn get_hits(Value(response): Value) -> Vec { + fn get_hits(response: &Value) -> Vec<&str> { let hits_array = response["hits"].as_array().unwrap(); - hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::>() + hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::>() } - let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await; - let hits = get_hits(response); + let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await; + let hits = get_hits(&response); snapshot!(code, @"200 OK"); snapshot!(hits.len(), @"2"); - snapshot!(format!("{:?}", hits), @"[1, 2]"); + snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#); + snapshot!(response["estimatedTotalHits"] , @"11"); - let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await; - let hits = get_hits(response); + let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await; + let hits = get_hits(&response); snapshot!(code, @"200 OK"); snapshot!(hits.len(), @"2"); - snapshot!(format!("{:?}", hits), @"[3, 4]"); + snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#); + snapshot!(response["estimatedTotalHits"], @"10"); - let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await; - let hits = get_hits(response); + let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"2"); + snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#); + snapshot!(response["estimatedTotalHits"], @"6"); + + let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await; + let hits = get_hits(&response); snapshot!(code, @"200 OK"); snapshot!(hits.len(), @"1"); - snapshot!(format!("{:?}", hits), @"[5]"); + snapshot!(format!("{:?}", hits), @r#"["345678"]"#); + snapshot!(response["estimatedTotalHits"], @"6"); - let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await; - let hits = get_hits(response); + let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await; + let hits = get_hits(&response); snapshot!(code, @"200 OK"); snapshot!(hits.len(), @"0"); + snapshot!(format!("{:?}", hits), @r#"[]"#); + snapshot!(response["estimatedTotalHits"], @"6"); + + let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"0"); + snapshot!(format!("{:?}", hits), @r#"[]"#); + snapshot!(response["estimatedTotalHits"], @"6"); +} + +/// testing: https://github.com/meilisearch/meilisearch/issues/4130 +#[actix_rt::test] +async fn distinct_search_with_pagination_no_ranking() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await; + index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await; + index.wait_task(1).await; + + fn get_hits(response: &Value) -> Vec<&str> { + let hits_array = response["hits"].as_array().unwrap(); + hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::>() + } + + let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"0"); + snapshot!(format!("{:?}", hits), @r#"[]"#); + snapshot!(response["page"], @"0"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); + + let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"2"); + snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#); + snapshot!(response["page"], @"1"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); + + let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"2"); + snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#); + snapshot!(response["page"], @"2"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); + + let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"2"); + snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#); + snapshot!(response["page"], @"3"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); + + let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"0"); + snapshot!(format!("{:?}", hits), @r#"[]"#); + snapshot!(response["page"], @"4"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); + + let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"3"); + snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#); + snapshot!(response["page"], @"2"); + snapshot!(response["totalPages"], @"2"); + snapshot!(response["totalHits"], @"6"); } diff --git a/milli/tests/search/distinct.rs b/milli/tests/search/distinct.rs index e1876286c..fc890dfe8 100644 --- a/milli/tests/search/distinct.rs +++ b/milli/tests/search/distinct.rs @@ -202,7 +202,7 @@ test_distinct!( EXTERNAL_DOCUMENTS_IDS.len(), 1, vec![], - 2 + 3 ); test_distinct!( // testing: https://github.com/meilisearch/meilisearch/issues/4078 @@ -212,7 +212,7 @@ test_distinct!( 1, 2, vec![], - 1 + 3 ); test_distinct!( // testing: https://github.com/meilisearch/meilisearch/issues/4078 @@ -222,7 +222,7 @@ test_distinct!( EXTERNAL_DOCUMENTS_IDS.len(), 2, vec![], - 5 + 7 ); test_distinct!( // testing: https://github.com/meilisearch/meilisearch/issues/4078 @@ -232,5 +232,5 @@ test_distinct!( 2, 4, vec![], - 3 + 7 ); From 2bae9550c8f1f5874c4e1bab5c3fe767dce416ec Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 19 Oct 2023 15:58:25 +0200 Subject: [PATCH 4/4] Add explanatory comment --- milli/src/search/new/bucket_sort.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index df9c14c7d..b46f6124f 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -60,6 +60,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( let mut all_candidates = universe - excluded; all_candidates.extend(results.iter().copied()); + // drain the results of the skipped elements + // this **must** be done **after** writing the entire results in `all_candidates` to ensure + // e.g. estimatedTotalHits is correct. if results.len() >= from { results.drain(..from); } else {