2494: Introduce the new faceting and pagination settings r=ManyTheFish a=Kerollmops

This PR introduces two new settings following the newly created spec https://github.com/meilisearch/specifications/pull/157:
 - The `faceting.max_values_per_facet` one describes the maximum number of values (each with a count) associated with a value in a facet distribution query.
 - The `pagination.limited_to` one describes the maximum number of documents that a search query can ever return.

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2022-06-09 12:09:21 +00:00 committed by GitHub
commit b9b32d65a8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 285 additions and 25 deletions

View file

@ -8,17 +8,18 @@ use std::sync::Arc;
use fst::IntoStreamer;
use milli::heed::{EnvOpenOptions, RoTxn};
use milli::update::{IndexerConfig, Setting};
use milli::{obkv_to_json, FieldDistribution};
use milli::{obkv_to_json, FieldDistribution, DEFAULT_VALUES_PER_FACET};
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use time::OffsetDateTime;
use uuid::Uuid;
use crate::index::search::DEFAULT_PAGINATION_LIMITED_TO;
use crate::EnvSizer;
use super::error::IndexError;
use super::error::Result;
use super::updates::{MinWordSizeTyposSetting, TypoSettings};
use super::updates::{FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, TypoSettings};
use super::{Checked, Settings};
pub type Document = Map<String, Value>;
@ -193,6 +194,20 @@ impl Index {
disable_on_attributes: Setting::Set(disabled_attributes),
};
let faceting = FacetingSettings {
max_values_per_facet: Setting::Set(
self.max_values_per_facet(txn)?
.unwrap_or(DEFAULT_VALUES_PER_FACET),
),
};
let pagination = PaginationSettings {
limited_to: Setting::Set(
self.pagination_limited_to(txn)?
.unwrap_or(DEFAULT_PAGINATION_LIMITED_TO),
),
};
Ok(Settings {
displayed_attributes: match displayed_attributes {
Some(attrs) => Setting::Set(attrs),
@ -212,6 +227,8 @@ impl Index {
},
synonyms: Setting::Set(synonyms),
typo_tolerance: Setting::Set(typo_tolerance),
faceting: Setting::Set(faceting),
pagination: Setting::Set(pagination),
_kind: PhantomData,
})
}

View file

@ -7,6 +7,7 @@ use either::Either;
use milli::tokenizer::TokenizerBuilder;
use milli::{
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder, SortError,
DEFAULT_VALUES_PER_FACET,
};
use regex::Regex;
use serde::{Deserialize, Serialize};
@ -28,7 +29,7 @@ pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
/// The maximimum number of results that the engine
/// will be able to return in one search call.
pub const HARD_RESULT_LIMIT: usize = 1000;
pub const DEFAULT_PAGINATION_LIMITED_TO: usize = 1000;
#[derive(Deserialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
@ -90,10 +91,14 @@ impl Index {
search.query(query);
}
let pagination_limited_to = self
.pagination_limited_to(&rtxn)?
.unwrap_or(DEFAULT_PAGINATION_LIMITED_TO);
// Make sure that a user can't get more documents than the hard limit,
// we align that on the offset too.
let offset = min(query.offset.unwrap_or(0), HARD_RESULT_LIMIT);
let limit = min(query.limit, HARD_RESULT_LIMIT.saturating_sub(offset));
let offset = min(query.offset.unwrap_or(0), pagination_limited_to);
let limit = min(query.limit, pagination_limited_to.saturating_sub(offset));
search.offset(offset);
search.limit(limit);
@ -223,6 +228,12 @@ impl Index {
let facet_distribution = match query.facets {
Some(ref fields) => {
let mut facet_distribution = self.facets_distribution(&rtxn);
let max_values_by_facet = self
.max_values_per_facet(&rtxn)?
.unwrap_or(DEFAULT_VALUES_PER_FACET);
facet_distribution.max_values_per_facet(max_values_by_facet);
if fields.iter().all(|f| f != "*") {
facet_distribution.facets(fields);
}

View file

@ -68,6 +68,27 @@ pub struct TypoSettings {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub disable_on_attributes: Setting<BTreeSet<String>>,
}
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct FacetingSettings {
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub max_values_per_facet: Setting<usize>,
}
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct PaginationSettings {
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub limited_to: Setting<usize>,
}
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
@ -114,6 +135,12 @@ pub struct Settings<T> {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
pub typo_tolerance: Setting<TypoSettings>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
pub faceting: Setting<FacetingSettings>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
pub pagination: Setting<PaginationSettings>,
#[serde(skip)]
pub _kind: PhantomData<T>,
@ -131,6 +158,8 @@ impl Settings<Checked> {
synonyms: Setting::Reset,
distinct_attribute: Setting::Reset,
typo_tolerance: Setting::Reset,
faceting: Setting::Reset,
pagination: Setting::Reset,
_kind: PhantomData,
}
}
@ -146,6 +175,8 @@ impl Settings<Checked> {
synonyms,
distinct_attribute,
typo_tolerance,
faceting,
pagination,
..
} = self;
@ -159,6 +190,8 @@ impl Settings<Checked> {
synonyms,
distinct_attribute,
typo_tolerance,
faceting,
pagination,
_kind: PhantomData,
}
}
@ -198,6 +231,8 @@ impl Settings<Unchecked> {
synonyms: self.synonyms,
distinct_attribute: self.distinct_attribute,
typo_tolerance: self.typo_tolerance,
faceting: self.faceting,
pagination: self.pagination,
_kind: PhantomData,
}
}
@ -427,6 +462,26 @@ pub fn apply_settings_to_builder(
}
Setting::NotSet => (),
}
match settings.faceting {
Setting::Set(ref value) => match value.max_values_per_facet {
Setting::Set(val) => builder.set_max_values_per_facet(val),
Setting::Reset => builder.reset_max_values_per_facet(),
Setting::NotSet => (),
},
Setting::Reset => builder.reset_max_values_per_facet(),
Setting::NotSet => (),
}
match settings.pagination {
Setting::Set(ref value) => match value.limited_to {
Setting::Set(val) => builder.set_pagination_limited_to(val),
Setting::Reset => builder.reset_pagination_limited_to(),
Setting::NotSet => (),
},
Setting::Reset => builder.reset_pagination_limited_to(),
Setting::NotSet => (),
}
}
#[cfg(test)]
@ -456,6 +511,8 @@ pub(crate) mod test {
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet,
pagination: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};
@ -478,6 +535,8 @@ pub(crate) mod test {
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet,
pagination: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};