mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 06:44:27 +01:00
Merge #4746
4746: Fix hybrid search limit offset r=irevoire a=dureuill # Pull Request ## Related issue Fixes #4745 ## What does this PR do? - Apply offset and limit to the keyword search results when they are returned early. - Add a test that is initially failing, and then passes Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
commit
0df84bbba7
@ -150,6 +150,35 @@ async fn simple_search() {
|
|||||||
snapshot!(response["semanticHitCount"], @"3");
|
snapshot!(response["semanticHitCount"], @"3");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn limit_offset() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.search_post(
|
||||||
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}}]"###);
|
||||||
|
snapshot!(response["semanticHitCount"], @"0");
|
||||||
|
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
||||||
|
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.search_post(
|
||||||
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}}]"###);
|
||||||
|
snapshot!(response["semanticHitCount"], @"1");
|
||||||
|
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn simple_search_hf() {
|
async fn simple_search_hf() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
|
@ -178,16 +178,16 @@ impl<'a> Search<'a> {
|
|||||||
|
|
||||||
// completely skip semantic search if the results of the keyword search are good enough
|
// completely skip semantic search if the results of the keyword search are good enough
|
||||||
if self.results_good_enough(&keyword_results, semantic_ratio) {
|
if self.results_good_enough(&keyword_results, semantic_ratio) {
|
||||||
return Ok((keyword_results, Some(0)));
|
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||||
}
|
}
|
||||||
|
|
||||||
// no vector search against placeholder search
|
// no vector search against placeholder search
|
||||||
let Some(query) = search.query.take() else {
|
let Some(query) = search.query.take() else {
|
||||||
return Ok((keyword_results, Some(0)));
|
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||||
};
|
};
|
||||||
// no embedder, no semantic search
|
// no embedder, no semantic search
|
||||||
let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else {
|
let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else {
|
||||||
return Ok((keyword_results, Some(0)));
|
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||||
};
|
};
|
||||||
|
|
||||||
let vector_query = match vector {
|
let vector_query = match vector {
|
||||||
@ -239,3 +239,44 @@ impl<'a> Search<'a> {
|
|||||||
true
|
true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn return_keyword_results(
|
||||||
|
limit: usize,
|
||||||
|
offset: usize,
|
||||||
|
SearchResult {
|
||||||
|
matching_words,
|
||||||
|
candidates,
|
||||||
|
mut documents_ids,
|
||||||
|
mut document_scores,
|
||||||
|
degraded,
|
||||||
|
used_negative_operator,
|
||||||
|
}: SearchResult,
|
||||||
|
) -> (SearchResult, Option<u32>) {
|
||||||
|
let (documents_ids, document_scores) = if offset >= documents_ids.len() ||
|
||||||
|
// technically redudant because documents_ids.len() == document_scores.len(),
|
||||||
|
// defensive programming
|
||||||
|
offset >= document_scores.len()
|
||||||
|
{
|
||||||
|
(vec![], vec![])
|
||||||
|
} else {
|
||||||
|
// PANICS: offset < len
|
||||||
|
documents_ids.rotate_left(offset);
|
||||||
|
documents_ids.truncate(limit);
|
||||||
|
|
||||||
|
// PANICS: offset < len
|
||||||
|
document_scores.rotate_left(offset);
|
||||||
|
document_scores.truncate(limit);
|
||||||
|
(documents_ids, document_scores)
|
||||||
|
};
|
||||||
|
(
|
||||||
|
SearchResult {
|
||||||
|
matching_words,
|
||||||
|
candidates,
|
||||||
|
documents_ids,
|
||||||
|
document_scores,
|
||||||
|
degraded,
|
||||||
|
used_negative_operator,
|
||||||
|
},
|
||||||
|
Some(0),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user