mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 08:14:26 +01:00
Merge #4108
4108: Fix bug where search with distinct attribute and no ranking, returns offset+limit hits r=curquiza a=vivek-26 # Pull Request ## Related issue Fixes #4078 ## What does this PR do? This PR - - Fixes bug where search with distinct attribute and no ranking, returns offset+limit hits. - Adds unit and integration tests. ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Vivek Kumar <vivek.26@outlook.com>
This commit is contained in:
commit
f343ef5f2f
63
meilisearch/tests/search/distinct.rs
Normal file
63
meilisearch/tests/search/distinct.rs
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
use meili_snap::snapshot;
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use serde_json::{json, Value};
|
||||||
|
|
||||||
|
use crate::common::Server;
|
||||||
|
|
||||||
|
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||||
|
json!([
|
||||||
|
{"productId": 1, "shopId": 1},
|
||||||
|
{"productId": 2, "shopId": 1},
|
||||||
|
{"productId": 3, "shopId": 2},
|
||||||
|
{"productId": 4, "shopId": 2},
|
||||||
|
{"productId": 5, "shopId": 3},
|
||||||
|
{"productId": 6, "shopId": 3},
|
||||||
|
{"productId": 7, "shopId": 4},
|
||||||
|
{"productId": 8, "shopId": 4},
|
||||||
|
{"productId": 9, "shopId": 5},
|
||||||
|
{"productId": 10, "shopId": 5}
|
||||||
|
])
|
||||||
|
});
|
||||||
|
|
||||||
|
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
|
||||||
|
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
|
||||||
|
|
||||||
|
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn distinct_search_with_offset_no_ranking() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
|
||||||
|
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||||
|
index.wait_task(1).await;
|
||||||
|
|
||||||
|
fn get_hits(response: Value) -> Vec<i64> {
|
||||||
|
let hits_array = response["hits"].as_array().unwrap();
|
||||||
|
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
|
||||||
|
}
|
||||||
|
|
||||||
|
let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
|
||||||
|
let hits = get_hits(response);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(hits.len(), @"2");
|
||||||
|
snapshot!(format!("{:?}", hits), @"[1, 2]");
|
||||||
|
|
||||||
|
let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
|
||||||
|
let hits = get_hits(response);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(hits.len(), @"2");
|
||||||
|
snapshot!(format!("{:?}", hits), @"[3, 4]");
|
||||||
|
|
||||||
|
let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
|
||||||
|
let hits = get_hits(response);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(hits.len(), @"1");
|
||||||
|
snapshot!(format!("{:?}", hits), @"[5]");
|
||||||
|
|
||||||
|
let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
|
||||||
|
let hits = get_hits(response);
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(hits.len(), @"0");
|
||||||
|
}
|
@ -1,6 +1,7 @@
|
|||||||
// This modules contains all the test concerning search. Each particular feature of the search
|
// This modules contains all the test concerning search. Each particular feature of the search
|
||||||
// should be tested in its own module to isolate tests and keep the tests readable.
|
// should be tested in its own module to isolate tests and keep the tests readable.
|
||||||
|
|
||||||
|
mod distinct;
|
||||||
mod errors;
|
mod errors;
|
||||||
mod facet_search;
|
mod facet_search;
|
||||||
mod formatted;
|
mod formatted;
|
||||||
|
@ -46,18 +46,27 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
if let Some(distinct_fid) = distinct_fid {
|
if let Some(distinct_fid) = distinct_fid {
|
||||||
let mut excluded = RoaringBitmap::new();
|
let mut excluded = RoaringBitmap::new();
|
||||||
let mut results = vec![];
|
let mut results = vec![];
|
||||||
|
let mut skip = 0;
|
||||||
for docid in universe.iter() {
|
for docid in universe.iter() {
|
||||||
if results.len() >= from + length {
|
if results.len() >= length {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if excluded.contains(docid) {
|
if excluded.contains(docid) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
|
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
|
||||||
|
skip += 1;
|
||||||
|
if skip <= from {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
results.push(docid);
|
results.push(docid);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut all_candidates = universe - excluded;
|
let mut all_candidates = universe - excluded;
|
||||||
all_candidates.extend(results.iter().copied());
|
all_candidates.extend(results.iter().copied());
|
||||||
|
|
||||||
return Ok(BucketSortOutput {
|
return Ok(BucketSortOutput {
|
||||||
scores: vec![Default::default(); results.len()],
|
scores: vec![Default::default(); results.len()],
|
||||||
docids: results,
|
docids: results,
|
||||||
|
@ -8,7 +8,7 @@ use Criterion::*;
|
|||||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||||
|
|
||||||
macro_rules! test_distinct {
|
macro_rules! test_distinct {
|
||||||
($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $criteria:expr, $n_res:expr) => {
|
($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $offset:expr, $criteria:expr, $n_res:expr) => {
|
||||||
#[test]
|
#[test]
|
||||||
fn $func() {
|
fn $func() {
|
||||||
let criteria = $criteria;
|
let criteria = $criteria;
|
||||||
@ -27,6 +27,7 @@ macro_rules! test_distinct {
|
|||||||
let mut search = Search::new(&rtxn, &index);
|
let mut search = Search::new(&rtxn, &index);
|
||||||
search.query(search::TEST_QUERY);
|
search.query(search::TEST_QUERY);
|
||||||
search.limit($limit);
|
search.limit($limit);
|
||||||
|
search.offset($offset);
|
||||||
search.exhaustive_number_hits($exhaustive);
|
search.exhaustive_number_hits($exhaustive);
|
||||||
|
|
||||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
@ -47,6 +48,7 @@ macro_rules! test_distinct {
|
|||||||
Some(d.id)
|
Some(d.id)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
.skip($offset)
|
||||||
.take($limit)
|
.take($limit)
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
@ -61,6 +63,7 @@ test_distinct!(
|
|||||||
tag,
|
tag,
|
||||||
true,
|
true,
|
||||||
1,
|
1,
|
||||||
|
0,
|
||||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||||
3
|
3
|
||||||
);
|
);
|
||||||
@ -69,6 +72,7 @@ test_distinct!(
|
|||||||
asc_desc_rank,
|
asc_desc_rank,
|
||||||
true,
|
true,
|
||||||
1,
|
1,
|
||||||
|
0,
|
||||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||||
7
|
7
|
||||||
);
|
);
|
||||||
@ -77,6 +81,7 @@ test_distinct!(
|
|||||||
asc_desc_rank,
|
asc_desc_rank,
|
||||||
true,
|
true,
|
||||||
0,
|
0,
|
||||||
|
0,
|
||||||
vec![Desc(S("attribute_rank")), Desc(S("exactness_rank")), Exactness, Typo],
|
vec![Desc(S("attribute_rank")), Desc(S("exactness_rank")), Exactness, Typo],
|
||||||
7
|
7
|
||||||
);
|
);
|
||||||
@ -86,6 +91,7 @@ test_distinct!(
|
|||||||
tag,
|
tag,
|
||||||
false,
|
false,
|
||||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
0,
|
||||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||||
3
|
3
|
||||||
);
|
);
|
||||||
@ -94,6 +100,7 @@ test_distinct!(
|
|||||||
asc_desc_rank,
|
asc_desc_rank,
|
||||||
false,
|
false,
|
||||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
0,
|
||||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||||
7
|
7
|
||||||
);
|
);
|
||||||
@ -102,6 +109,7 @@ test_distinct!(
|
|||||||
tag,
|
tag,
|
||||||
false,
|
false,
|
||||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
0,
|
||||||
vec![Words],
|
vec![Words],
|
||||||
3
|
3
|
||||||
);
|
);
|
||||||
@ -110,6 +118,7 @@ test_distinct!(
|
|||||||
asc_desc_rank,
|
asc_desc_rank,
|
||||||
false,
|
false,
|
||||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
0,
|
||||||
vec![Words],
|
vec![Words],
|
||||||
7
|
7
|
||||||
);
|
);
|
||||||
@ -118,6 +127,7 @@ test_distinct!(
|
|||||||
tag,
|
tag,
|
||||||
false,
|
false,
|
||||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
0,
|
||||||
vec![Words, Typo],
|
vec![Words, Typo],
|
||||||
3
|
3
|
||||||
);
|
);
|
||||||
@ -126,6 +136,7 @@ test_distinct!(
|
|||||||
asc_desc_rank,
|
asc_desc_rank,
|
||||||
false,
|
false,
|
||||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
0,
|
||||||
vec![Words, Typo],
|
vec![Words, Typo],
|
||||||
7
|
7
|
||||||
);
|
);
|
||||||
@ -134,6 +145,7 @@ test_distinct!(
|
|||||||
tag,
|
tag,
|
||||||
false,
|
false,
|
||||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
0,
|
||||||
vec![Words, Proximity],
|
vec![Words, Proximity],
|
||||||
3
|
3
|
||||||
);
|
);
|
||||||
@ -142,6 +154,7 @@ test_distinct!(
|
|||||||
asc_desc_rank,
|
asc_desc_rank,
|
||||||
false,
|
false,
|
||||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
0,
|
||||||
vec![Words, Proximity],
|
vec![Words, Proximity],
|
||||||
7
|
7
|
||||||
);
|
);
|
||||||
@ -150,6 +163,7 @@ test_distinct!(
|
|||||||
tag,
|
tag,
|
||||||
false,
|
false,
|
||||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
0,
|
||||||
vec![Words, Attribute],
|
vec![Words, Attribute],
|
||||||
3
|
3
|
||||||
);
|
);
|
||||||
@ -158,6 +172,7 @@ test_distinct!(
|
|||||||
asc_desc_rank,
|
asc_desc_rank,
|
||||||
false,
|
false,
|
||||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
0,
|
||||||
vec![Words, Attribute],
|
vec![Words, Attribute],
|
||||||
7
|
7
|
||||||
);
|
);
|
||||||
@ -166,6 +181,7 @@ test_distinct!(
|
|||||||
tag,
|
tag,
|
||||||
false,
|
false,
|
||||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
0,
|
||||||
vec![Words, Exactness],
|
vec![Words, Exactness],
|
||||||
3
|
3
|
||||||
);
|
);
|
||||||
@ -174,6 +190,47 @@ test_distinct!(
|
|||||||
asc_desc_rank,
|
asc_desc_rank,
|
||||||
false,
|
false,
|
||||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
0,
|
||||||
vec![Words, Exactness],
|
vec![Words, Exactness],
|
||||||
7
|
7
|
||||||
);
|
);
|
||||||
|
test_distinct!(
|
||||||
|
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||||
|
distinct_string_limit_and_offset,
|
||||||
|
tag,
|
||||||
|
false,
|
||||||
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
1,
|
||||||
|
vec![],
|
||||||
|
2
|
||||||
|
);
|
||||||
|
test_distinct!(
|
||||||
|
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||||
|
exhaustive_distinct_string_limit_and_offset,
|
||||||
|
tag,
|
||||||
|
true,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
vec![],
|
||||||
|
1
|
||||||
|
);
|
||||||
|
test_distinct!(
|
||||||
|
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||||
|
distinct_number_limit_and_offset,
|
||||||
|
asc_desc_rank,
|
||||||
|
false,
|
||||||
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
2,
|
||||||
|
vec![],
|
||||||
|
5
|
||||||
|
);
|
||||||
|
test_distinct!(
|
||||||
|
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||||
|
exhaustive_distinct_number_limit_and_offset,
|
||||||
|
asc_desc_rank,
|
||||||
|
true,
|
||||||
|
2,
|
||||||
|
4,
|
||||||
|
vec![],
|
||||||
|
3
|
||||||
|
);
|
||||||
|
Loading…
Reference in New Issue
Block a user