2281: Hard limit the number of results returned by a search r=Kerollmops a=Kerollmops

This PR fixes #2133 by hard-limiting the number of results that a search request can return at any time. I would like the guidance of `@MarinPostma` to test that, should I use a mocking test here? Or should I do anything else?

I talked about touching the _nb_hits_ value with `@qdequele` and we concluded that it was not correct to do so.

Could you please confirm that it is the right place to change that?

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2022-04-04 17:19:05 +00:00 committed by GitHub
commit 09a72cee03
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 88 additions and 2 deletions

View File

@ -267,3 +267,79 @@ async fn displayed_attributes() {
assert_eq!(code, 200, "{}", response);
assert!(response["hits"].get("title").is_none());
}
#[actix_rt::test]
async fn placeholder_search_is_hard_limited() {
let server = Server::new().await;
let index = server.index("test");
let documents: Vec<_> = (0..1200)
.map(|i| json!({ "id": i, "text": "I am unique!" }))
.collect();
index.add_documents(documents.into(), None).await;
index.wait_task(0).await;
index
.search(
json!({
"limit": 1500,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1000);
},
)
.await;
index
.search(
json!({
"offset": 800,
"limit": 400,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 200);
},
)
.await;
}
#[actix_rt::test]
async fn search_is_hard_limited() {
let server = Server::new().await;
let index = server.index("test");
let documents: Vec<_> = (0..1200)
.map(|i| json!({ "id": i, "text": "I am unique!" }))
.collect();
index.add_documents(documents.into(), None).await;
index.wait_task(0).await;
index
.search(
json!({
"q": "unique",
"limit": 1500,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1000);
},
)
.await;
index
.search(
json!({
"q": "unique",
"offset": 800,
"limit": 400,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 200);
},
)
.await;
}

View File

@ -1,3 +1,4 @@
use std::cmp::min;
use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::str::FromStr;
use std::time::Instant;
@ -34,6 +35,10 @@ pub const fn default_crop_length() -> usize {
DEFAULT_CROP_LENGTH
}
/// The maximimum number of results that the engine
/// will be able to return in one search call.
pub const HARD_RESULT_LIMIT: usize = 1000;
#[derive(Deserialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct SearchQuery {
@ -97,8 +102,13 @@ impl Index {
search.query(query);
}
search.limit(query.limit);
search.offset(query.offset.unwrap_or_default());
// Make sure that a user can't get more documents than the hard limit,
// we align that on the offset too.
let offset = min(query.offset.unwrap_or(0), HARD_RESULT_LIMIT);
let limit = min(query.limit, HARD_RESULT_LIMIT.saturating_sub(offset));
search.offset(offset);
search.limit(limit);
if let Some(ref filter) = query.filter {
if let Some(facets) = parse_filter(filter)? {