2494: Introduce the new faceting and pagination settings r=ManyTheFish a=Kerollmops

This PR introduces two new settings following the newly created spec https://github.com/meilisearch/specifications/pull/157:
 - The `faceting.max_values_per_facet` one describes the maximum number of values (each with a count) associated with a value in a facet distribution query.
 - The `pagination.limited_to` one describes the maximum number of documents that a search query can ever return.

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2022-06-09 12:09:21 +00:00 committed by GitHub
commit b9b32d65a8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 285 additions and 25 deletions

View file

@ -282,6 +282,50 @@ make_setting_route!(
}
);
make_setting_route!(
"/faceting",
patch,
meilisearch_lib::index::updates::FacetingSettings,
faceting,
"faceting",
analytics,
|setting: &Option<meilisearch_lib::index::updates::FacetingSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"Faceting Updated".to_string(),
json!({
"faceting": {
"max_values_per_facet": setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/pagination",
patch,
meilisearch_lib::index::updates::PaginationSettings,
pagination,
"pagination",
analytics,
|setting: &Option<meilisearch_lib::index::updates::PaginationSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"Pagination Updated".to_string(),
json!({
"pagination": {
"limited_to": setting.as_ref().and_then(|s| s.limited_to.set()),
},
}),
Some(req),
);
}
);
macro_rules! generate_configure {
($($mod:ident),*) => {
pub fn configure(cfg: &mut web::ServiceConfig) {

View file

@ -61,7 +61,7 @@ async fn import_dump_v2_movie_raw() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }})
json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
);
let (tasks, code) = index.list_tasks().await;
@ -125,7 +125,7 @@ async fn import_dump_v2_movie_with_settings() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }})
json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
);
let (tasks, code) = index.list_tasks().await;
@ -189,7 +189,7 @@ async fn import_dump_v2_rubygems_with_settings() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }})
json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 }})
);
let (tasks, code) = index.list_tasks().await;
@ -253,7 +253,7 @@ async fn import_dump_v3_movie_raw() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }})
json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
);
let (tasks, code) = index.list_tasks().await;
@ -317,7 +317,7 @@ async fn import_dump_v3_movie_with_settings() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }})
json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
);
let (tasks, code) = index.list_tasks().await;
@ -381,7 +381,7 @@ async fn import_dump_v3_rubygems_with_settings() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }})
json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
);
let (tasks, code) = index.list_tasks().await;
@ -445,7 +445,7 @@ async fn import_dump_v4_movie_raw() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({ "displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }})
json!({ "displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
);
let (tasks, code) = index.list_tasks().await;
@ -509,7 +509,7 @@ async fn import_dump_v4_movie_with_settings() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }})
json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
);
let (tasks, code) = index.list_tasks().await;
@ -573,7 +573,7 @@ async fn import_dump_v4_rubygems_with_settings() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({ "displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }})
json!({ "displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
);
let (tasks, code) = index.list_tasks().await;

View file

@ -565,6 +565,36 @@ async fn placeholder_search_is_hard_limited() {
},
)
.await;
index
.update_settings(json!({ "pagination": { "limitedTo": 10_000 } }))
.await;
index.wait_task(1).await;
index
.search(
json!({
"limit": 1500,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1200);
},
)
.await;
index
.search(
json!({
"offset": 1000,
"limit": 400,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 200);
},
)
.await;
}
#[actix_rt::test]
@ -604,4 +634,85 @@ async fn search_is_hard_limited() {
},
)
.await;
index
.update_settings(json!({ "pagination": { "limitedTo": 10_000 } }))
.await;
index.wait_task(1).await;
index
.search(
json!({
"q": "unique",
"limit": 1500,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1200);
},
)
.await;
index
.search(
json!({
"q": "unique",
"offset": 1000,
"limit": 400,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 200);
},
)
.await;
}
#[actix_rt::test]
async fn faceting_max_values_per_facet() {
let server = Server::new().await;
let index = server.index("test");
index
.update_settings(json!({ "filterableAttributes": ["number"] }))
.await;
let documents: Vec<_> = (0..10_000)
.map(|id| json!({ "id": id, "number": id * 10 }))
.collect();
index.add_documents(json!(documents), None).await;
index.wait_task(1).await;
index
.search(
json!({
"facets": ["number"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
let numbers = response["facetDistribution"]["number"].as_object().unwrap();
assert_eq!(numbers.len(), 100);
},
)
.await;
index
.update_settings(json!({ "faceting": { "maxValuesPerFacet": 10_000 } }))
.await;
index.wait_task(2).await;
index
.search(
json!({
"facets": ["number"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
let numbers = dbg!(&response)["facetDistribution"]["number"]
.as_object()
.unwrap();
assert_eq!(numbers.len(), 10_000);
},
)
.await;
}

View file

@ -24,6 +24,12 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
);
map.insert("stop_words", json!([]));
map.insert("synonyms", json!({}));
map.insert(
"faceting",
json!({
"maxValuesByFacet": json!(100),
}),
);
map
});
@ -43,7 +49,7 @@ async fn get_settings() {
let (response, code) = index.settings().await;
assert_eq!(code, 200);
let settings = response.as_object().unwrap();
assert_eq!(settings.keys().len(), 9);
assert_eq!(settings.keys().len(), 11);
assert_eq!(settings["displayedAttributes"], json!(["*"]));
assert_eq!(settings["searchableAttributes"], json!(["*"]));
assert_eq!(settings["filterableAttributes"], json!([]));
@ -61,6 +67,18 @@ async fn get_settings() {
])
);
assert_eq!(settings["stopWords"], json!([]));
assert_eq!(
settings["faceting"],
json!({
"maxValuesPerFacet": 100,
})
);
assert_eq!(
settings["pagination"],
json!({
"limitedTo": 1000,
})
);
}
#[actix_rt::test]