mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-07 12:04:30 +01:00
Merge #4140
4140: Fix bug where search with distinct attribute has wrong totalHits and totalPages r=dureuill a=vivek-26 # Pull Request ## Related issue Fixes #4130 ## What does this PR do? This PR - - Fixes the bug where search with distinct attribute and pagination (no ranking) returns incorrect values for `totalHits` and `totalPages`. - Add/update unit and integration tests. ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Vivek Kumar <vivek.26@outlook.com> Co-authored-by: Louis Dureuil <louis.dureuil@gmail.com>
This commit is contained in:
commit
1032d82643
@ -6,21 +6,109 @@ use crate::common::Server;
|
|||||||
|
|
||||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||||
json!([
|
json!([
|
||||||
{"productId": 1, "shopId": 1},
|
{
|
||||||
{"productId": 2, "shopId": 1},
|
"id": 1,
|
||||||
{"productId": 3, "shopId": 2},
|
"description": "Leather Jacket",
|
||||||
{"productId": 4, "shopId": 2},
|
"brand": "Lee Jeans",
|
||||||
{"productId": 5, "shopId": 3},
|
"product_id": "123456",
|
||||||
{"productId": 6, "shopId": 3},
|
"color": "Brown"
|
||||||
{"productId": 7, "shopId": 4},
|
},
|
||||||
{"productId": 8, "shopId": 4},
|
{
|
||||||
{"productId": 9, "shopId": 5},
|
"id": 2,
|
||||||
{"productId": 10, "shopId": 5}
|
"description": "Leather Jacket",
|
||||||
|
"brand": "Lee Jeans",
|
||||||
|
"product_id": "123456",
|
||||||
|
"color": "Black"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"description": "Leather Jacket",
|
||||||
|
"brand": "Lee Jeans",
|
||||||
|
"product_id": "123456",
|
||||||
|
"color": "Blue"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"description": "T-Shirt",
|
||||||
|
"brand": "Nike",
|
||||||
|
"product_id": "789012",
|
||||||
|
"color": "Red"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"description": "T-Shirt",
|
||||||
|
"brand": "Nike",
|
||||||
|
"product_id": "789012",
|
||||||
|
"color": "Blue"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"description": "Running Shoes",
|
||||||
|
"brand": "Adidas",
|
||||||
|
"product_id": "456789",
|
||||||
|
"color": "Black"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"description": "Running Shoes",
|
||||||
|
"brand": "Adidas",
|
||||||
|
"product_id": "456789",
|
||||||
|
"color": "White"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"description": "Hoodie",
|
||||||
|
"brand": "Puma",
|
||||||
|
"product_id": "987654",
|
||||||
|
"color": "Gray"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 9,
|
||||||
|
"description": "Sweater",
|
||||||
|
"brand": "Gap",
|
||||||
|
"product_id": "234567",
|
||||||
|
"color": "Green"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 10,
|
||||||
|
"description": "Sweater",
|
||||||
|
"brand": "Gap",
|
||||||
|
"product_id": "234567",
|
||||||
|
"color": "Red"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 11,
|
||||||
|
"description": "Sweater",
|
||||||
|
"brand": "Gap",
|
||||||
|
"product_id": "234567",
|
||||||
|
"color": "Blue"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 12,
|
||||||
|
"description": "Jeans",
|
||||||
|
"brand": "Levi's",
|
||||||
|
"product_id": "345678",
|
||||||
|
"color": "Indigo"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"description": "Jeans",
|
||||||
|
"brand": "Levi's",
|
||||||
|
"product_id": "345678",
|
||||||
|
"color": "Black"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 14,
|
||||||
|
"description": "Jeans",
|
||||||
|
"brand": "Levi's",
|
||||||
|
"product_id": "345678",
|
||||||
|
"color": "Stone Wash"
|
||||||
|
}
|
||||||
])
|
])
|
||||||
});
|
});
|
||||||
|
|
||||||
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
|
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id";
|
||||||
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
|
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id";
|
||||||
|
|
||||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
@ -33,31 +121,121 @@ async fn distinct_search_with_offset_no_ranking() {
|
|||||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||||
index.wait_task(1).await;
|
index.wait_task(1).await;
|
||||||
|
|
||||||
fn get_hits(response: Value) -> Vec<i64> {
|
fn get_hits(response: &Value) -> Vec<&str> {
|
||||||
let hits_array = response["hits"].as_array().unwrap();
|
let hits_array = response["hits"].as_array().unwrap();
|
||||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
|
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
|
||||||
}
|
}
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
|
let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
|
||||||
let hits = get_hits(response);
|
let hits = get_hits(&response);
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(hits.len(), @"2");
|
snapshot!(hits.len(), @"2");
|
||||||
snapshot!(format!("{:?}", hits), @"[1, 2]");
|
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
|
||||||
|
snapshot!(response["estimatedTotalHits"] , @"11");
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
|
let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
|
||||||
let hits = get_hits(response);
|
let hits = get_hits(&response);
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(hits.len(), @"2");
|
snapshot!(hits.len(), @"2");
|
||||||
snapshot!(format!("{:?}", hits), @"[3, 4]");
|
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
|
||||||
|
snapshot!(response["estimatedTotalHits"], @"10");
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
|
let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
|
||||||
let hits = get_hits(response);
|
let hits = get_hits(&response);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(hits.len(), @"2");
|
||||||
|
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
|
||||||
|
snapshot!(response["estimatedTotalHits"], @"6");
|
||||||
|
|
||||||
|
let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
|
||||||
|
let hits = get_hits(&response);
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(hits.len(), @"1");
|
snapshot!(hits.len(), @"1");
|
||||||
snapshot!(format!("{:?}", hits), @"[5]");
|
snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
|
||||||
|
snapshot!(response["estimatedTotalHits"], @"6");
|
||||||
|
|
||||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
|
let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
|
||||||
let hits = get_hits(response);
|
let hits = get_hits(&response);
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(hits.len(), @"0");
|
snapshot!(hits.len(), @"0");
|
||||||
|
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||||
|
snapshot!(response["estimatedTotalHits"], @"6");
|
||||||
|
|
||||||
|
let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
|
||||||
|
let hits = get_hits(&response);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(hits.len(), @"0");
|
||||||
|
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||||
|
snapshot!(response["estimatedTotalHits"], @"6");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// testing: https://github.com/meilisearch/meilisearch/issues/4130
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn distinct_search_with_pagination_no_ranking() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
|
||||||
|
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||||
|
index.wait_task(1).await;
|
||||||
|
|
||||||
|
fn get_hits(response: &Value) -> Vec<&str> {
|
||||||
|
let hits_array = response["hits"].as_array().unwrap();
|
||||||
|
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
|
||||||
|
}
|
||||||
|
|
||||||
|
let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
|
||||||
|
let hits = get_hits(&response);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(hits.len(), @"0");
|
||||||
|
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||||
|
snapshot!(response["page"], @"0");
|
||||||
|
snapshot!(response["totalPages"], @"3");
|
||||||
|
snapshot!(response["totalHits"], @"6");
|
||||||
|
|
||||||
|
let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
|
||||||
|
let hits = get_hits(&response);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(hits.len(), @"2");
|
||||||
|
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
|
||||||
|
snapshot!(response["page"], @"1");
|
||||||
|
snapshot!(response["totalPages"], @"3");
|
||||||
|
snapshot!(response["totalHits"], @"6");
|
||||||
|
|
||||||
|
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
|
||||||
|
let hits = get_hits(&response);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(hits.len(), @"2");
|
||||||
|
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
|
||||||
|
snapshot!(response["page"], @"2");
|
||||||
|
snapshot!(response["totalPages"], @"3");
|
||||||
|
snapshot!(response["totalHits"], @"6");
|
||||||
|
|
||||||
|
let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
|
||||||
|
let hits = get_hits(&response);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(hits.len(), @"2");
|
||||||
|
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
|
||||||
|
snapshot!(response["page"], @"3");
|
||||||
|
snapshot!(response["totalPages"], @"3");
|
||||||
|
snapshot!(response["totalHits"], @"6");
|
||||||
|
|
||||||
|
let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
|
||||||
|
let hits = get_hits(&response);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(hits.len(), @"0");
|
||||||
|
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||||
|
snapshot!(response["page"], @"4");
|
||||||
|
snapshot!(response["totalPages"], @"3");
|
||||||
|
snapshot!(response["totalHits"], @"6");
|
||||||
|
|
||||||
|
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
|
||||||
|
let hits = get_hits(&response);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(hits.len(), @"3");
|
||||||
|
snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
|
||||||
|
snapshot!(response["page"], @"2");
|
||||||
|
snapshot!(response["totalPages"], @"2");
|
||||||
|
snapshot!(response["totalHits"], @"6");
|
||||||
}
|
}
|
||||||
|
@ -46,9 +46,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
if let Some(distinct_fid) = distinct_fid {
|
if let Some(distinct_fid) = distinct_fid {
|
||||||
let mut excluded = RoaringBitmap::new();
|
let mut excluded = RoaringBitmap::new();
|
||||||
let mut results = vec![];
|
let mut results = vec![];
|
||||||
let mut skip = 0;
|
|
||||||
for docid in universe.iter() {
|
for docid in universe.iter() {
|
||||||
if results.len() >= length {
|
if results.len() >= from + length {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if excluded.contains(docid) {
|
if excluded.contains(docid) {
|
||||||
@ -56,16 +55,19 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
|
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
|
||||||
skip += 1;
|
|
||||||
if skip <= from {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
results.push(docid);
|
results.push(docid);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut all_candidates = universe - excluded;
|
let mut all_candidates = universe - excluded;
|
||||||
all_candidates.extend(results.iter().copied());
|
all_candidates.extend(results.iter().copied());
|
||||||
|
// drain the results of the skipped elements
|
||||||
|
// this **must** be done **after** writing the entire results in `all_candidates` to ensure
|
||||||
|
// e.g. estimatedTotalHits is correct.
|
||||||
|
if results.len() >= from {
|
||||||
|
results.drain(..from);
|
||||||
|
} else {
|
||||||
|
results.clear();
|
||||||
|
}
|
||||||
|
|
||||||
return Ok(BucketSortOutput {
|
return Ok(BucketSortOutput {
|
||||||
scores: vec![Default::default(); results.len()],
|
scores: vec![Default::default(); results.len()],
|
||||||
|
@ -202,7 +202,7 @@ test_distinct!(
|
|||||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
1,
|
1,
|
||||||
vec![],
|
vec![],
|
||||||
2
|
3
|
||||||
);
|
);
|
||||||
test_distinct!(
|
test_distinct!(
|
||||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||||
@ -212,7 +212,7 @@ test_distinct!(
|
|||||||
1,
|
1,
|
||||||
2,
|
2,
|
||||||
vec![],
|
vec![],
|
||||||
1
|
3
|
||||||
);
|
);
|
||||||
test_distinct!(
|
test_distinct!(
|
||||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||||
@ -222,7 +222,7 @@ test_distinct!(
|
|||||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
2,
|
2,
|
||||||
vec![],
|
vec![],
|
||||||
5
|
7
|
||||||
);
|
);
|
||||||
test_distinct!(
|
test_distinct!(
|
||||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||||
@ -232,5 +232,5 @@ test_distinct!(
|
|||||||
2,
|
2,
|
||||||
4,
|
4,
|
||||||
vec![],
|
vec![],
|
||||||
3
|
7
|
||||||
);
|
);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user