Introduce the new index chat settings

This commit is contained in:
Clément Renault 2025-05-21 11:07:06 +02:00
parent 439146289e
commit c6930c8819
No known key found for this signature in database
GPG key ID: F250A4C4E3AE5F5F
12 changed files with 227 additions and 21 deletions

View file

@ -387,7 +387,8 @@ VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
NotFoundSimilarId , InvalidRequest , BAD_REQUEST ;
InvalidDocumentEditionContext , InvalidRequest , BAD_REQUEST ;
InvalidDocumentEditionFunctionFilter , InvalidRequest , BAD_REQUEST ;
EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST
EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST ;
InvalidSettingsIndexChat , InvalidRequest , BAD_REQUEST
}
impl ErrorCode for JoinError {

View file

@ -11,6 +11,7 @@ use fst::IntoStreamer;
use milli::disabled_typos_terms::DisabledTyposTerms;
use milli::index::{IndexEmbeddingConfig, PrefixSearch};
use milli::proximity::ProximityPrecision;
pub use milli::update::ChatSettings;
use milli::update::Setting;
use milli::{Criterion, CriterionError, FilterableAttributesRule, Index, DEFAULT_VALUES_PER_FACET};
use serde::{Deserialize, Serialize, Serializer};
@ -199,72 +200,86 @@ pub struct Settings<T> {
#[deserr(default, error = DeserrJsonError<InvalidSettingsDisplayedAttributes>)]
#[schema(value_type = Option<Vec<String>>, example = json!(["id", "title", "description", "url"]))]
pub displayed_attributes: WildcardSetting,
/// Fields in which to search for matching query words sorted by order of importance.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSearchableAttributes>)]
#[schema(value_type = Option<Vec<String>>, example = json!(["title", "description"]))]
pub searchable_attributes: WildcardSetting,
/// Attributes to use for faceting and filtering. See [Filtering and Faceted Search](https://www.meilisearch.com/docs/learn/filtering_and_sorting/search_with_facet_filters).
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsFilterableAttributes>)]
#[schema(value_type = Option<Vec<FilterableAttributesRule>>, example = json!(["release_date", "genre"]))]
pub filterable_attributes: Setting<Vec<FilterableAttributesRule>>,
/// Attributes to use when sorting search results.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSortableAttributes>)]
#[schema(value_type = Option<Vec<String>>, example = json!(["release_date"]))]
pub sortable_attributes: Setting<BTreeSet<String>>,
/// List of ranking rules sorted by order of importance. The order is customizable.
/// [A list of ordered built-in ranking rules](https://www.meilisearch.com/docs/learn/relevancy/relevancy).
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsRankingRules>)]
#[schema(value_type = Option<Vec<String>>, example = json!([RankingRuleView::Words, RankingRuleView::Typo, RankingRuleView::Proximity, RankingRuleView::Attribute, RankingRuleView::Exactness]))]
pub ranking_rules: Setting<Vec<RankingRuleView>>,
/// List of words ignored when present in search queries.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsStopWords>)]
#[schema(value_type = Option<Vec<String>>, example = json!(["the", "a", "them", "their"]))]
pub stop_words: Setting<BTreeSet<String>>,
/// List of characters not delimiting where one term begins and ends.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsNonSeparatorTokens>)]
#[schema(value_type = Option<Vec<String>>, example = json!([" ", "\n"]))]
pub non_separator_tokens: Setting<BTreeSet<String>>,
/// List of characters delimiting where one term begins and ends.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSeparatorTokens>)]
#[schema(value_type = Option<Vec<String>>, example = json!(["S"]))]
pub separator_tokens: Setting<BTreeSet<String>>,
/// List of strings Meilisearch should parse as a single term.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsDictionary>)]
#[schema(value_type = Option<Vec<String>>, example = json!(["iPhone pro"]))]
pub dictionary: Setting<BTreeSet<String>>,
/// List of associated words treated similarly. A word associated to an array of word as synonyms.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSynonyms>)]
#[schema(value_type = Option<BTreeMap<String, Vec<String>>>, example = json!({ "he": ["she", "they", "them"], "phone": ["iPhone", "android"]}))]
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
/// Search returns documents with distinct (different) values of the given field.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsDistinctAttribute>)]
#[schema(value_type = Option<String>, example = json!("sku"))]
pub distinct_attribute: Setting<String>,
/// Precision level when calculating the proximity ranking rule.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsProximityPrecision>)]
#[schema(value_type = Option<String>, example = json!(ProximityPrecisionView::ByAttribute))]
pub proximity_precision: Setting<ProximityPrecisionView>,
/// Customize typo tolerance feature.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsTypoTolerance>)]
#[schema(value_type = Option<TypoSettings>, example = json!({ "enabled": true, "disableOnAttributes": ["title"]}))]
pub typo_tolerance: Setting<TypoSettings>,
/// Faceting settings.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsFaceting>)]
#[schema(value_type = Option<FacetingSettings>, example = json!({ "maxValuesPerFacet": 10, "sortFacetValuesBy": { "genre": FacetValuesSort::Count }}))]
pub faceting: Setting<FacetingSettings>,
/// Pagination settings.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsPagination>)]
@ -276,24 +291,34 @@ pub struct Settings<T> {
#[deserr(default, error = DeserrJsonError<InvalidSettingsEmbedders>)]
#[schema(value_type = Option<BTreeMap<String, SettingEmbeddingSettings>>)]
pub embedders: Setting<BTreeMap<String, SettingEmbeddingSettings>>,
/// Maximum duration of a search query.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSearchCutoffMs>)]
#[schema(value_type = Option<u64>, example = json!(50))]
pub search_cutoff_ms: Setting<u64>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsLocalizedAttributes>)]
#[schema(value_type = Option<Vec<LocalizedAttributesRuleView>>, example = json!(50))]
pub localized_attributes: Setting<Vec<LocalizedAttributesRuleView>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsFacetSearch>)]
#[schema(value_type = Option<bool>, example = json!(true))]
pub facet_search: Setting<bool>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsPrefixSearch>)]
#[schema(value_type = Option<PrefixSearchSettings>, example = json!("Hemlo"))]
pub prefix_search: Setting<PrefixSearchSettings>,
/// Customize the chat prompting.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsIndexChat>)]
#[schema(value_type = Option<ChatSettings>)]
pub chat: Setting<ChatSettings>,
#[serde(skip)]
#[deserr(skip)]
pub _kind: PhantomData<T>,
@ -359,6 +384,7 @@ impl Settings<Checked> {
localized_attributes: Setting::Reset,
facet_search: Setting::Reset,
prefix_search: Setting::Reset,
chat: Setting::Reset,
_kind: PhantomData,
}
}
@ -385,6 +411,7 @@ impl Settings<Checked> {
localized_attributes: localized_attributes_rules,
facet_search,
prefix_search,
chat,
_kind,
} = self;
@ -409,6 +436,7 @@ impl Settings<Checked> {
localized_attributes: localized_attributes_rules,
facet_search,
prefix_search,
chat,
_kind: PhantomData,
}
}
@ -459,6 +487,7 @@ impl Settings<Unchecked> {
localized_attributes: self.localized_attributes,
facet_search: self.facet_search,
prefix_search: self.prefix_search,
chat: self.chat,
_kind: PhantomData,
}
}
@ -533,8 +562,9 @@ impl Settings<Unchecked> {
Setting::Set(this)
}
},
prefix_search: other.prefix_search.or(self.prefix_search),
facet_search: other.facet_search.or(self.facet_search),
prefix_search: other.prefix_search.or(self.prefix_search),
chat: other.chat.clone().or(self.chat.clone()),
_kind: PhantomData,
}
}
@ -573,6 +603,7 @@ pub fn apply_settings_to_builder(
localized_attributes: localized_attributes_rules,
facet_search,
prefix_search,
chat,
_kind,
} = settings;
@ -783,6 +814,12 @@ pub fn apply_settings_to_builder(
Setting::Reset => builder.reset_facet_search(),
Setting::NotSet => (),
}
match chat {
Setting::Set(chat) => builder.set_chat(chat.clone()),
Setting::Reset => builder.reset_chat(),
Setting::NotSet => (),
}
}
pub enum SecretPolicy {
@ -880,14 +917,11 @@ pub fn settings(
})
.collect();
let embedders = Setting::Set(embedders);
let search_cutoff_ms = index.search_cutoff(rtxn)?;
let localized_attributes_rules = index.localized_attributes_rules(rtxn)?;
let prefix_search = index.prefix_search(rtxn)?.map(PrefixSearchSettings::from);
let facet_search = index.facet_search(rtxn)?;
let chat = index.chat_config(rtxn).map(ChatSettings::from)?;
let mut settings = Settings {
displayed_attributes: match displayed_attributes {
@ -925,8 +959,9 @@ pub fn settings(
Some(rules) => Setting::Set(rules.into_iter().map(|r| r.into()).collect()),
None => Setting::Reset,
},
prefix_search: Setting::Set(prefix_search.unwrap_or_default()),
facet_search: Setting::Set(facet_search),
prefix_search: Setting::Set(prefix_search.unwrap_or_default()),
chat: Setting::Set(chat),
_kind: PhantomData,
};
@ -1154,6 +1189,7 @@ pub(crate) mod test {
search_cutoff_ms: Setting::NotSet,
facet_search: Setting::NotSet,
prefix_search: Setting::NotSet,
chat: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};
@ -1185,6 +1221,8 @@ pub(crate) mod test {
search_cutoff_ms: Setting::NotSet,
facet_search: Setting::NotSet,
prefix_search: Setting::NotSet,
chat: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};