add a settings for the search cutoff

This commit is contained in:
Tamo 2024-03-11 18:24:21 +01:00
parent 4a467739cd
commit d1db495119
11 changed files with 169 additions and 35 deletions

View File

@ -277,6 +277,7 @@ pub(crate) mod test {
}),
pagination: Setting::NotSet,
embedders: Setting::NotSet,
search_cutoff: Setting::NotSet,
_kind: std::marker::PhantomData,
};
settings.check()

View File

@ -379,6 +379,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
v5::Setting::NotSet => v6::Setting::NotSet,
},
embedders: v6::Setting::NotSet,
search_cutoff: v6::Setting::NotSet,
_kind: std::marker::PhantomData,
}
}

View File

@ -259,6 +259,7 @@ InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSearchCutoff , InvalidRequest , BAD_REQUEST ;
InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ;
InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;

View File

@ -202,6 +202,9 @@ pub struct Settings<T> {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsEmbedders>)]
pub embedders: Setting<BTreeMap<String, Setting<milli::vector::settings::EmbeddingSettings>>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSearchCutoff>)]
pub search_cutoff: Setting<u64>,
#[serde(skip)]
#[deserr(skip)]
@ -227,6 +230,7 @@ impl Settings<Checked> {
faceting: Setting::Reset,
pagination: Setting::Reset,
embedders: Setting::Reset,
search_cutoff: Setting::Reset,
_kind: PhantomData,
}
}
@ -249,6 +253,7 @@ impl Settings<Checked> {
faceting,
pagination,
embedders,
search_cutoff,
..
} = self;
@ -269,6 +274,7 @@ impl Settings<Checked> {
faceting,
pagination,
embedders,
search_cutoff,
_kind: PhantomData,
}
}
@ -315,6 +321,7 @@ impl Settings<Unchecked> {
faceting: self.faceting,
pagination: self.pagination,
embedders: self.embedders,
search_cutoff: self.search_cutoff,
_kind: PhantomData,
}
}
@ -347,19 +354,40 @@ pub fn apply_settings_to_builder(
settings: &Settings<Checked>,
builder: &mut milli::update::Settings,
) {
match settings.searchable_attributes {
let Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
non_separator_tokens,
separator_tokens,
dictionary,
synonyms,
distinct_attribute,
proximity_precision,
typo_tolerance,
faceting,
pagination,
embedders,
search_cutoff,
_kind,
} = settings;
match searchable_attributes {
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
Setting::Reset => builder.reset_searchable_fields(),
Setting::NotSet => (),
}
match settings.displayed_attributes {
match displayed_attributes {
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
Setting::Reset => builder.reset_displayed_fields(),
Setting::NotSet => (),
}
match settings.filterable_attributes {
match filterable_attributes {
Setting::Set(ref facets) => {
builder.set_filterable_fields(facets.clone().into_iter().collect())
}
@ -367,13 +395,13 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match settings.sortable_attributes {
match sortable_attributes {
Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()),
Setting::Reset => builder.reset_sortable_fields(),
Setting::NotSet => (),
}
match settings.ranking_rules {
match ranking_rules {
Setting::Set(ref criteria) => {
builder.set_criteria(criteria.iter().map(|c| c.clone().into()).collect())
}
@ -381,13 +409,13 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match settings.stop_words {
match stop_words {
Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
Setting::Reset => builder.reset_stop_words(),
Setting::NotSet => (),
}
match settings.non_separator_tokens {
match non_separator_tokens {
Setting::Set(ref non_separator_tokens) => {
builder.set_non_separator_tokens(non_separator_tokens.clone())
}
@ -395,7 +423,7 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match settings.separator_tokens {
match separator_tokens {
Setting::Set(ref separator_tokens) => {
builder.set_separator_tokens(separator_tokens.clone())
}
@ -403,31 +431,31 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match settings.dictionary {
match dictionary {
Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()),
Setting::Reset => builder.reset_dictionary(),
Setting::NotSet => (),
}
match settings.synonyms {
match synonyms {
Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
Setting::Reset => builder.reset_synonyms(),
Setting::NotSet => (),
}
match settings.distinct_attribute {
match distinct_attribute {
Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
Setting::Reset => builder.reset_distinct_field(),
Setting::NotSet => (),
}
match settings.proximity_precision {
match proximity_precision {
Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()),
Setting::Reset => builder.reset_proximity_precision(),
Setting::NotSet => (),
}
match settings.typo_tolerance {
match typo_tolerance {
Setting::Set(ref value) => {
match value.enabled {
Setting::Set(val) => builder.set_autorize_typos(val),
@ -482,7 +510,7 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match &settings.faceting {
match faceting {
Setting::Set(FacetingSettings { max_values_per_facet, sort_facet_values_by }) => {
match max_values_per_facet {
Setting::Set(val) => builder.set_max_values_per_facet(*val),
@ -504,7 +532,7 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match settings.pagination {
match pagination {
Setting::Set(ref value) => match value.max_total_hits {
Setting::Set(val) => builder.set_pagination_max_total_hits(val),
Setting::Reset => builder.reset_pagination_max_total_hits(),
@ -514,11 +542,17 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match settings.embedders.clone() {
Setting::Set(value) => builder.set_embedder_settings(value),
match embedders {
Setting::Set(value) => builder.set_embedder_settings(value.clone()),
Setting::Reset => builder.reset_embedder_settings(),
Setting::NotSet => (),
}
match search_cutoff {
Setting::Set(cutoff) => builder.set_search_cutoff(*cutoff),
Setting::Reset => builder.reset_search_cutoff(),
Setting::NotSet => (),
}
}
pub fn settings(
@ -607,6 +641,8 @@ pub fn settings(
.collect();
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
let search_cutoff = index.search_cutoff(rtxn)?;
Ok(Settings {
displayed_attributes: match displayed_attributes {
Some(attrs) => Setting::Set(attrs),
@ -633,6 +669,10 @@ pub fn settings(
faceting: Setting::Set(faceting),
pagination: Setting::Set(pagination),
embedders,
search_cutoff: match search_cutoff {
Some(cutoff) => Setting::Set(cutoff),
None => Setting::Reset,
},
_kind: PhantomData,
})
}
@ -783,6 +823,7 @@ pub(crate) mod test {
faceting: Setting::NotSet,
pagination: Setting::NotSet,
embedders: Setting::NotSet,
search_cutoff: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};
@ -809,6 +850,7 @@ pub(crate) mod test {
faceting: Setting::NotSet,
pagination: Setting::NotSet,
embedders: Setting::NotSet,
search_cutoff: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};

View File

@ -624,6 +624,25 @@ fn embedder_analytics(
)
}
make_setting_route!(
"/search_cutoff",
patch,
u64,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoff,
>,
search_cutoff,
"search_cutoff",
analytics,
|setting: &Option<u64>, req: &HttpRequest| {
analytics.publish(
"Search Cutoff Updated".to_string(),
serde_json::json!({"search_cutoff": setting }),
Some(req),
);
}
);
macro_rules! generate_configure {
($($mod:ident),*) => {
pub fn configure(cfg: &mut web::ServiceConfig) {
@ -765,7 +784,8 @@ pub async fn update_all(
"synonyms": {
"total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()),
},
"embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set())
"embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()),
"search_cutoff": new_settings.search_cutoff.as_ref().set(),
}),
Some(&req),
);

View File

@ -496,8 +496,11 @@ pub fn perform_search(
distribution: Option<DistributionShift>,
) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let time_budget = TimeBudget::new(Duration::from_millis(150));
let rtxn = index.read_txn()?;
let time_budget = match index.search_cutoff(&rtxn)? {
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
None => TimeBudget::default(),
};
let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &query, features, distribution, time_budget)?;

View File

@ -77,7 +77,8 @@ async fn import_dump_v1_movie_raw() {
},
"pagination": {
"maxTotalHits": 1000
}
},
"searchCutoff": null
}
"###
);
@ -238,7 +239,8 @@ async fn import_dump_v1_movie_with_settings() {
},
"pagination": {
"maxTotalHits": 1000
}
},
"searchCutoff": null
}
"###
);
@ -385,7 +387,8 @@ async fn import_dump_v1_rubygems_with_settings() {
},
"pagination": {
"maxTotalHits": 1000
}
},
"searchCutoff": null
}
"###
);
@ -518,7 +521,8 @@ async fn import_dump_v2_movie_raw() {
},
"pagination": {
"maxTotalHits": 1000
}
},
"searchCutoff": null
}
"###
);
@ -663,7 +667,8 @@ async fn import_dump_v2_movie_with_settings() {
},
"pagination": {
"maxTotalHits": 1000
}
},
"searchCutoff": null
}
"###
);
@ -807,7 +812,8 @@ async fn import_dump_v2_rubygems_with_settings() {
},
"pagination": {
"maxTotalHits": 1000
}
},
"searchCutoff": null
}
"###
);
@ -940,7 +946,8 @@ async fn import_dump_v3_movie_raw() {
},
"pagination": {
"maxTotalHits": 1000
}
},
"searchCutoff": null
}
"###
);
@ -1085,7 +1092,8 @@ async fn import_dump_v3_movie_with_settings() {
},
"pagination": {
"maxTotalHits": 1000
}
},
"searchCutoff": null
}
"###
);
@ -1229,7 +1237,8 @@ async fn import_dump_v3_rubygems_with_settings() {
},
"pagination": {
"maxTotalHits": 1000
}
},
"searchCutoff": null
}
"###
);
@ -1362,7 +1371,8 @@ async fn import_dump_v4_movie_raw() {
},
"pagination": {
"maxTotalHits": 1000
}
},
"searchCutoff": null
}
"###
);
@ -1507,7 +1517,8 @@ async fn import_dump_v4_movie_with_settings() {
},
"pagination": {
"maxTotalHits": 1000
}
},
"searchCutoff": null
}
"###
);
@ -1651,7 +1662,8 @@ async fn import_dump_v4_rubygems_with_settings() {
},
"pagination": {
"maxTotalHits": 1000
}
},
"searchCutoff": null
}
"###
);
@ -1895,7 +1907,8 @@ async fn import_dump_v6_containing_experimental_features() {
},
"pagination": {
"maxTotalHits": 1000
}
},
"searchCutoff": null
}
"###);

View File

@ -49,12 +49,12 @@ async fn get_settings_unexisting_index() {
async fn get_settings() {
let server = Server::new().await;
let index = server.index("test");
index.create(None).await;
index.wait_task(0).await;
let (response, _code) = index.create(None).await;
index.wait_task(response.uid()).await;
let (response, code) = index.settings().await;
assert_eq!(code, 200);
let settings = response.as_object().unwrap();
assert_eq!(settings.keys().len(), 15);
assert_eq!(settings.keys().len(), 16);
assert_eq!(settings["displayedAttributes"], json!(["*"]));
assert_eq!(settings["searchableAttributes"], json!(["*"]));
assert_eq!(settings["filterableAttributes"], json!([]));
@ -84,6 +84,7 @@ async fn get_settings() {
})
);
assert_eq!(settings["proximityPrecision"], json!("byWord"));
assert_eq!(settings["searchCutoff"], json!(null));
}
#[actix_rt::test]

View File

@ -67,6 +67,7 @@ pub mod main_key {
pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits";
pub const PROXIMITY_PRECISION: &str = "proximity-precision";
pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
pub const SEARCH_CUTOFF: &str = "search_cutoff";
}
pub mod db_name {
@ -1505,6 +1506,18 @@ impl Index {
_ => "default".to_owned(),
})
}
pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> {
self.main.remap_types::<Str, BEU64>().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff)
}
pub fn search_cutoff(&self, rtxn: &RoTxn<'_>) -> Result<Option<u64>> {
Ok(self.main.remap_types::<Str, BEU64>().get(rtxn, main_key::SEARCH_CUTOFF)?)
}
pub(crate) fn delete_search_cutoff(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCH_CUTOFF)
}
}
#[cfg(test)]

View File

@ -121,6 +121,12 @@ impl fmt::Debug for TimeBudget {
}
}
impl Default for TimeBudget {
fn default() -> Self {
Self::new(std::time::Duration::from_millis(150))
}
}
impl TimeBudget {
pub fn new(budget: std::time::Duration) -> Self {
Self { started_at: std::time::Instant::now(), budget }

View File

@ -150,6 +150,7 @@ pub struct Settings<'a, 't, 'i> {
pagination_max_total_hits: Setting<usize>,
proximity_precision: Setting<ProximityPrecision>,
embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
search_cutoff: Setting<u64>,
}
impl<'a, 't, 'i> Settings<'a, 't, 'i> {
@ -183,6 +184,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
pagination_max_total_hits: Setting::NotSet,
proximity_precision: Setting::NotSet,
embedder_settings: Setting::NotSet,
search_cutoff: Setting::NotSet,
indexer_config,
}
}
@ -373,6 +375,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.embedder_settings = Setting::Reset;
}
pub fn set_search_cutoff(&mut self, value: u64) {
self.search_cutoff = Setting::Set(value);
}
pub fn reset_search_cutoff(&mut self) {
self.search_cutoff = Setting::Reset;
}
#[tracing::instrument(
level = "trace"
skip(self, progress_callback, should_abort, old_fields_ids_map),
@ -1026,6 +1036,24 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
Ok(update)
}
fn update_search_cutoff(&mut self) -> Result<bool> {
let changed = match self.search_cutoff {
Setting::Set(new) => {
let old = self.index.search_cutoff(self.wtxn)?;
if old == Some(new) {
false
} else {
self.index.put_search_cutoff(self.wtxn, new)?;
true
}
}
Setting::Reset => self.index.delete_search_cutoff(self.wtxn)?,
Setting::NotSet => false,
};
Ok(changed)
}
pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>
where
FP: Fn(UpdateIndexingStep) + Sync,
@ -1071,6 +1099,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
// 3. Keep the old vectors but reattempt indexing on a prompt change: only actually changed prompt will need embedding + storage
let embedding_configs_updated = self.update_embedding_configs()?;
// never trigger re-indexing
self.update_search_cutoff()?;
if stop_words_updated
|| non_separator_tokens_updated
|| separator_tokens_updated
@ -2027,6 +2058,7 @@ mod tests {
pagination_max_total_hits,
proximity_precision,
embedder_settings,
search_cutoff,
} = settings;
assert!(matches!(searchable_fields, Setting::NotSet));
assert!(matches!(displayed_fields, Setting::NotSet));
@ -2050,6 +2082,7 @@ mod tests {
assert!(matches!(pagination_max_total_hits, Setting::NotSet));
assert!(matches!(proximity_precision, Setting::NotSet));
assert!(matches!(embedder_settings, Setting::NotSet));
assert!(matches!(search_cutoff, Setting::NotSet));
})
.unwrap();
}