2601: Ease search result pagination r=Kerollmops a=ManyTheFish

# Summary
This PR is a prototype enhancing search results pagination (#2577)

# Todo

- [x] Update the API to return the number of pages and allow users to directly choose a page instead of computing an offset
- [x] Change computation of `total_pages` in order to have an exact count
  - [x] compute query tree exhaustively
  - [x] compute distinct exhaustively

# Small Documentation

## Default search query

**request**:
```sh
curl \
  -X POST 'http://localhost:7700/indexes/movies/search' \
  -H 'Content-Type: application/json' \
  --data-binary '{ "q": "botman" }'
```

**result**:
```json
{
  "hits":[...],
  "query":"botman",
  "processingTimeMs":5,
  "hitsPerPage":20,
  "page":1,
  "totalPages":4,
  "totalHits":66
}
```

## Search query with offset parameter

**request**:
```sh
curl \
  -X POST 'http://localhost:7700/indexes/movies/search' \
  -H 'Content-Type: application/json' \
  --data-binary '{ "q": "botman", "offset": 0 }'
```

**result**:
```json
{
  "hits":[...],
  "query":"botman",
  "processingTimeMs":3,
  "limit":20,
  "offset":0,
  "estimatedTotalHits":66
}
```

## Search query selecting page with page parameter

**request**:
```sh
curl \
  -X POST 'http://localhost:7700/indexes/movies/search' \
  -H 'Content-Type: application/json' \
  --data-binary '{ "q": "botman", "page": 2 }'
```

**result**:
```json
{
  "hits":[...],
  "query":"botman",
  "processingTimeMs":5,
  "hitsPerPage":20,
  "page":2,
  "totalPages":4,
  "totalHits":66
}
```

# Related

fixes #2577

## In charge of the feature

Core: `@ManyTheFish` 
Docs: `@guimachiavelli` 
Integration: `@bidoubiwa` 


Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
bors[bot] 2022-10-26 16:10:58 +00:00 committed by GitHub
commit 25ec51e783
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 247 additions and 52 deletions

View file

@ -10,7 +10,7 @@ use http::header::CONTENT_TYPE;
use meilisearch_auth::SearchRules;
use meilisearch_lib::index::{
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
};
use meilisearch_lib::index_controller::Stats;
use meilisearch_lib::MeiliSearch;
@ -373,6 +373,7 @@ pub struct SearchAggregator {
// pagination
max_limit: usize,
max_offset: usize,
finite_pagination: usize,
// formatting
highlight_pre_tag: bool,
@ -427,12 +428,20 @@ impl SearchAggregator {
ret.max_terms_number = q.split_whitespace().count();
}
if query.is_finite_pagination() {
let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
ret.max_limit = limit;
ret.max_offset = query.page.unwrap_or(1).saturating_sub(1) * limit;
ret.finite_pagination = 1;
} else {
ret.max_limit = query.limit;
ret.max_offset = query.offset;
ret.finite_pagination = 0;
}
ret.matching_strategy
.insert(format!("{:?}", query.matching_strategy), 1);
ret.max_limit = query.limit;
ret.max_offset = query.offset.unwrap_or_default();
ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
ret.crop_marker = query.crop_marker != DEFAULT_CROP_MARKER();
@ -491,6 +500,7 @@ impl SearchAggregator {
// pagination
self.max_limit = self.max_limit.max(other.max_limit);
self.max_offset = self.max_offset.max(other.max_offset);
self.finite_pagination += other.finite_pagination;
self.highlight_pre_tag |= other.highlight_pre_tag;
self.highlight_post_tag |= other.highlight_post_tag;
@ -534,6 +544,7 @@ impl SearchAggregator {
"pagination": {
"max_limit": self.max_limit,
"max_offset": self.max_offset,
"finite_pagination": self.finite_pagination > self.total_received / 2,
},
"formatting": {
"highlight_pre_tag": self.highlight_pre_tag,

View file

@ -4,6 +4,7 @@ use meilisearch_auth::IndexSearchRules;
use meilisearch_lib::index::{
MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEARCH_OFFSET,
};
use meilisearch_lib::MeiliSearch;
use meilisearch_types::error::ResponseError;
@ -27,8 +28,12 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct SearchQueryGet {
q: Option<String>,
offset: Option<usize>,
limit: Option<usize>,
#[serde(default = "DEFAULT_SEARCH_OFFSET")]
offset: usize,
#[serde(default = "DEFAULT_SEARCH_LIMIT")]
limit: usize,
page: Option<usize>,
hits_per_page: Option<usize>,
attributes_to_retrieve: Option<CS<String>>,
attributes_to_crop: Option<CS<String>>,
#[serde(default = "DEFAULT_CROP_LENGTH")]
@ -62,7 +67,9 @@ impl From<SearchQueryGet> for SearchQuery {
Self {
q: other.q,
offset: other.offset,
limit: other.limit.unwrap_or_else(DEFAULT_SEARCH_LIMIT),
limit: other.limit,
page: other.page,
hits_per_page: other.hits_per_page,
attributes_to_retrieve: other
.attributes_to_retrieve
.map(|o| o.into_iter().collect()),

View file

@ -3,6 +3,7 @@
mod errors;
mod formatted;
mod pagination;
use crate::common::Server;
use once_cell::sync::Lazy;

View file

@ -0,0 +1,112 @@
use crate::common::Server;
use crate::search::DOCUMENTS;
use serde_json::json;
#[actix_rt::test]
async fn default_search_should_return_estimated_total_hit() {
let server = Server::new().await;
let index = server.index("basic");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
index
.search(json!({}), |response, code| {
assert_eq!(code, 200, "{}", response);
assert!(response.get("estimatedTotalHits").is_some());
assert!(response.get("limit").is_some());
assert!(response.get("offset").is_some());
// these fields shouldn't be present
assert!(response.get("totalHits").is_none());
assert!(response.get("page").is_none());
assert!(response.get("totalPages").is_none());
})
.await;
}
#[actix_rt::test]
async fn simple_search() {
let server = Server::new().await;
let index = server.index("basic");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
index
.search(json!({"page": 1}), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 5);
assert!(response.get("totalHits").is_some());
assert_eq!(response["page"], 1);
assert_eq!(response["totalPages"], 1);
// these fields shouldn't be present
assert!(response.get("estimatedTotalHits").is_none());
assert!(response.get("limit").is_none());
assert!(response.get("offset").is_none());
})
.await;
}
#[actix_rt::test]
async fn page_zero_should_not_return_any_result() {
let server = Server::new().await;
let index = server.index("basic");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
index
.search(json!({"page": 0}), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 0);
assert!(response.get("totalHits").is_some());
assert_eq!(response["page"], 0);
assert_eq!(response["totalPages"], 1);
})
.await;
}
#[actix_rt::test]
async fn hits_per_page_1() {
let server = Server::new().await;
let index = server.index("basic");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
index
.search(json!({"hitsPerPage": 1}), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
assert_eq!(response["totalHits"], 5);
assert_eq!(response["page"], 1);
assert_eq!(response["totalPages"], 5);
})
.await;
}
#[actix_rt::test]
async fn hits_per_page_0_should_not_return_any_result() {
let server = Server::new().await;
let index = server.index("basic");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
index
.search(json!({"hitsPerPage": 0}), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 0);
assert_eq!(response["totalHits"], 5);
assert_eq!(response["page"], 1);
assert_eq!(response["totalPages"], 0);
})
.await;
}