Merge #2601

2601: Ease search result pagination r=Kerollmops a=ManyTheFish # Summary This PR is a prototype enhancing search results pagination (#2577) # Todo - [x] Update the API to return the number of pages and allow users to directly choose a page instead of computing an offset - [x] Change computation of `total_pages` in order to have an exact count - [x] compute query tree exhaustively - [x] compute distinct exhaustively # Small Documentation ## Default search query **request**: ```sh curl \ -X POST 'http://localhost:7700/indexes/movies/search' \ -H 'Content-Type: application/json' \ --data-binary '{ "q": "botman" }' ``` **result**: ```json { "hits":[...], "query":"botman", "processingTimeMs":5, "hitsPerPage":20, "page":1, "totalPages":4, "totalHits":66 } ``` ## Search query with offset parameter **request**: ```sh curl \ -X POST 'http://localhost:7700/indexes/movies/search' \ -H 'Content-Type: application/json' \ --data-binary '{ "q": "botman", "offset": 0 }' ``` **result**: ```json { "hits":[...], "query":"botman", "processingTimeMs":3, "limit":20, "offset":0, "estimatedTotalHits":66 } ``` ## Search query selecting page with page parameter **request**: ```sh curl \ -X POST 'http://localhost:7700/indexes/movies/search' \ -H 'Content-Type: application/json' \ --data-binary '{ "q": "botman", "page": 2 }' ``` **result**: ```json { "hits":[...], "query":"botman", "processingTimeMs":5, "hitsPerPage":20, "page":2, "totalPages":4, "totalHits":66 } ``` # Related fixes #2577 ## In charge of the feature Core: `@ManyTheFish` Docs: `@guimachiavelli` Integration: `@bidoubiwa` Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-07-03 11:57:07 +02:00 · 2022-10-26 16:10:58 +00:00 · 2022-10-26 16:10:58 +00:00 · 25ec51e783
commit 25ec51e783
parent 9aef1031ca f4021273b8
13 changed files with 247 additions and 52 deletions
--- a/meilisearch-http/src/analytics/segment_analytics.rs
+++ b/meilisearch-http/src/analytics/segment_analytics.rs
@ -10,7 +10,7 @@ use http::header::CONTENT_TYPE;
 use meilisearch_auth::SearchRules;
 use meilisearch_lib::index::{
    SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
-    DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
+    DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
 };
 use meilisearch_lib::index_controller::Stats;
 use meilisearch_lib::MeiliSearch;
@ -373,6 +373,7 @@ pub struct SearchAggregator {
    // pagination
    max_limit: usize,
    max_offset: usize,
+    finite_pagination: usize,

    // formatting
    highlight_pre_tag: bool,
@ -427,12 +428,20 @@ impl SearchAggregator {
            ret.max_terms_number = q.split_whitespace().count();
        }

+        if query.is_finite_pagination() {
+            let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
+            ret.max_limit = limit;
+            ret.max_offset = query.page.unwrap_or(1).saturating_sub(1) * limit;
+            ret.finite_pagination = 1;
+        } else {
+            ret.max_limit = query.limit;
+            ret.max_offset = query.offset;
+            ret.finite_pagination = 0;
+        }
+
        ret.matching_strategy
            .insert(format!("{:?}", query.matching_strategy), 1);

-        ret.max_limit = query.limit;
-        ret.max_offset = query.offset.unwrap_or_default();
-
        ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
        ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
        ret.crop_marker = query.crop_marker != DEFAULT_CROP_MARKER();
@ -491,6 +500,7 @@ impl SearchAggregator {
        // pagination
        self.max_limit = self.max_limit.max(other.max_limit);
        self.max_offset = self.max_offset.max(other.max_offset);
+        self.finite_pagination += other.finite_pagination;

        self.highlight_pre_tag |= other.highlight_pre_tag;
        self.highlight_post_tag |= other.highlight_post_tag;
@ -534,6 +544,7 @@ impl SearchAggregator {
                "pagination": {
                   "max_limit": self.max_limit,
                   "max_offset": self.max_offset,
+                   "finite_pagination": self.finite_pagination > self.total_received / 2,
                },
                "formatting": {
                    "highlight_pre_tag": self.highlight_pre_tag,
--- a/meilisearch-http/src/routes/indexes/search.rs
+++ b/meilisearch-http/src/routes/indexes/search.rs
@ -4,6 +4,7 @@ use meilisearch_auth::IndexSearchRules;
 use meilisearch_lib::index::{
    MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
    DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
+    DEFAULT_SEARCH_OFFSET,
 };
 use meilisearch_lib::MeiliSearch;
 use meilisearch_types::error::ResponseError;
@ -27,8 +28,12 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
 #[serde(rename_all = "camelCase", deny_unknown_fields)]
 pub struct SearchQueryGet {
    q: Option<String>,
-    offset: Option<usize>,
-    limit: Option<usize>,
+    #[serde(default = "DEFAULT_SEARCH_OFFSET")]
+    offset: usize,
+    #[serde(default = "DEFAULT_SEARCH_LIMIT")]
+    limit: usize,
+    page: Option<usize>,
+    hits_per_page: Option<usize>,
    attributes_to_retrieve: Option<CS<String>>,
    attributes_to_crop: Option<CS<String>>,
    #[serde(default = "DEFAULT_CROP_LENGTH")]
@ -62,7 +67,9 @@ impl From<SearchQueryGet> for SearchQuery {
        Self {
            q: other.q,
            offset: other.offset,
-            limit: other.limit.unwrap_or_else(DEFAULT_SEARCH_LIMIT),
+            limit: other.limit,
+            page: other.page,
+            hits_per_page: other.hits_per_page,
            attributes_to_retrieve: other
                .attributes_to_retrieve
                .map(|o| o.into_iter().collect()),
--- a/meilisearch-http/tests/search/mod.rs
+++ b/meilisearch-http/tests/search/mod.rs
@ -3,6 +3,7 @@

 mod errors;
 mod formatted;
+mod pagination;

 use crate::common::Server;
 use once_cell::sync::Lazy;
--- a/meilisearch-http/tests/search/pagination.rs
+++ b/meilisearch-http/tests/search/pagination.rs
@ -0,0 +1,112 @@
+use crate::common::Server;
+use crate::search::DOCUMENTS;
+use serde_json::json;
+
+#[actix_rt::test]
+async fn default_search_should_return_estimated_total_hit() {
+    let server = Server::new().await;
+    let index = server.index("basic");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, None).await;
+    index.wait_task(0).await;
+
+    index
+        .search(json!({}), |response, code| {
+            assert_eq!(code, 200, "{}", response);
+            assert!(response.get("estimatedTotalHits").is_some());
+            assert!(response.get("limit").is_some());
+            assert!(response.get("offset").is_some());
+
+            // these fields shouldn't be present
+            assert!(response.get("totalHits").is_none());
+            assert!(response.get("page").is_none());
+            assert!(response.get("totalPages").is_none());
+        })
+        .await;
+}
+
+#[actix_rt::test]
+async fn simple_search() {
+    let server = Server::new().await;
+    let index = server.index("basic");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, None).await;
+    index.wait_task(0).await;
+
+    index
+        .search(json!({"page": 1}), |response, code| {
+            assert_eq!(code, 200, "{}", response);
+            assert_eq!(response["hits"].as_array().unwrap().len(), 5);
+            assert!(response.get("totalHits").is_some());
+            assert_eq!(response["page"], 1);
+            assert_eq!(response["totalPages"], 1);
+
+            // these fields shouldn't be present
+            assert!(response.get("estimatedTotalHits").is_none());
+            assert!(response.get("limit").is_none());
+            assert!(response.get("offset").is_none());
+        })
+        .await;
+}
+
+#[actix_rt::test]
+async fn page_zero_should_not_return_any_result() {
+    let server = Server::new().await;
+    let index = server.index("basic");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, None).await;
+    index.wait_task(0).await;
+
+    index
+        .search(json!({"page": 0}), |response, code| {
+            assert_eq!(code, 200, "{}", response);
+            assert_eq!(response["hits"].as_array().unwrap().len(), 0);
+            assert!(response.get("totalHits").is_some());
+            assert_eq!(response["page"], 0);
+            assert_eq!(response["totalPages"], 1);
+        })
+        .await;
+}
+
+#[actix_rt::test]
+async fn hits_per_page_1() {
+    let server = Server::new().await;
+    let index = server.index("basic");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, None).await;
+    index.wait_task(0).await;
+
+    index
+        .search(json!({"hitsPerPage": 1}), |response, code| {
+            assert_eq!(code, 200, "{}", response);
+            assert_eq!(response["hits"].as_array().unwrap().len(), 1);
+            assert_eq!(response["totalHits"], 5);
+            assert_eq!(response["page"], 1);
+            assert_eq!(response["totalPages"], 5);
+        })
+        .await;
+}
+
+#[actix_rt::test]
+async fn hits_per_page_0_should_not_return_any_result() {
+    let server = Server::new().await;
+    let index = server.index("basic");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, None).await;
+    index.wait_task(0).await;
+
+    index
+        .search(json!({"hitsPerPage": 0}), |response, code| {
+            assert_eq!(code, 200, "{}", response);
+            assert_eq!(response["hits"].as_array().unwrap().len(), 0);
+            assert_eq!(response["totalHits"], 5);
+            assert_eq!(response["page"], 1);
+            assert_eq!(response["totalPages"], 0);
+        })
+        .await;
+}