diff --git a/Cargo.lock b/Cargo.lock index fdb799787..fb405d891 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3471,6 +3471,7 @@ dependencies = [ "clap", "crossbeam-channel", "deserr", + "doc-comment", "dump", "either", "file-store", diff --git a/crates/meilisearch-types/src/settings.rs b/crates/meilisearch-types/src/settings.rs index f7216a0cf..a5416583b 100644 --- a/crates/meilisearch-types/src/settings.rs +++ b/crates/meilisearch-types/src/settings.rs @@ -144,6 +144,25 @@ impl MergeWithError for DeserrJsonError)] + pub inner: Setting, +} + +impl Deserr for SettingEmbeddingSettings { + fn deserialize_from_value( + value: deserr::Value, + location: ValuePointerRef, + ) -> Result { + Setting::::deserialize_from_value( + value, location, + ) + .map(|inner| Self { inner }) + } +} + /// Holds all the settings for an index. `T` can either be `Checked` if they represents settings /// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a /// call to `check` will return a `Settings` from a `Settings`. @@ -237,7 +256,7 @@ pub struct Settings { #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = String)] // TODO: TAMO - pub embedders: Setting>>, + pub embedders: Setting>, /// Maximum duration of a search query. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] @@ -254,7 +273,6 @@ pub struct Settings { #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!("Hemlo"))] - // TODO: TAMO pub prefix_search: Setting, #[serde(skip)] @@ -269,7 +287,7 @@ impl Settings { }; for mut embedder in embedders.values_mut() { - let Setting::Set(embedder) = &mut embedder else { + let SettingEmbeddingSettings { inner: Setting::Set(embedder) } = &mut embedder else { continue; }; @@ -434,8 +452,9 @@ impl Settings { let Setting::Set(mut configs) = self.embedders else { return Ok(self) }; for (name, config) in configs.iter_mut() { let config_to_check = std::mem::take(config); - let checked_config = milli::update::validate_embedding_settings(config_to_check, name)?; - *config = checked_config + let checked_config = + milli::update::validate_embedding_settings(config_to_check.inner, name)?; + *config = SettingEmbeddingSettings { inner: checked_config }; } self.embedders = Setting::Set(configs); Ok(self) @@ -713,7 +732,9 @@ pub fn apply_settings_to_builder( } match embedders { - Setting::Set(value) => builder.set_embedder_settings(value.clone()), + Setting::Set(value) => builder.set_embedder_settings( + value.iter().map(|(k, v)| (k.clone(), v.inner.clone())).collect(), + ), Setting::Reset => builder.reset_embedder_settings(), Setting::NotSet => (), } @@ -827,7 +848,9 @@ pub fn settings( let embedders: BTreeMap<_, _> = index .embedding_configs(rtxn)? .into_iter() - .map(|IndexEmbeddingConfig { name, config, .. }| (name, Setting::Set(config.into()))) + .map(|IndexEmbeddingConfig { name, config, .. }| { + (name, SettingEmbeddingSettings { inner: Setting::Set(config.into()) }) + }) .collect(); let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) }; @@ -886,7 +909,7 @@ pub fn settings( Ok(settings) } -#[derive(Debug, Clone, PartialEq, Eq, Deserr)] +#[derive(Debug, Clone, PartialEq, Eq, Deserr, ToSchema)] #[deserr(try_from(&String) = FromStr::from_str -> CriterionError)] pub enum RankingRuleView { /// Sorted by decreasing number of matched query terms. @@ -982,7 +1005,7 @@ impl From for Criterion { } } -#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)] +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize, ToSchema)] #[serde(deny_unknown_fields, rename_all = "camelCase")] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub enum ProximityPrecisionView { diff --git a/crates/meilisearch/Cargo.toml b/crates/meilisearch/Cargo.toml index 5094e6807..2b6583526 100644 --- a/crates/meilisearch/Cargo.toml +++ b/crates/meilisearch/Cargo.toml @@ -109,6 +109,7 @@ utoipa = { version = "5.2.0", features = ["actix_extras", "macros", "non_strict_ utoipa-scalar = { version = "0.2.0", features = ["actix-web"] } utoipa-rapidoc = { version = "5.0.0", features = ["actix-web"] } utoipa-redoc = { version = "5.0.0", features = ["actix-web"] } +doc-comment = "0.3.3" [dev-dependencies] actix-rt = "2.10.0" diff --git a/crates/meilisearch/src/routes/indexes/mod.rs b/crates/meilisearch/src/routes/indexes/mod.rs index b2f949da9..c6f2a9397 100644 --- a/crates/meilisearch/src/routes/indexes/mod.rs +++ b/crates/meilisearch/src/routes/indexes/mod.rs @@ -43,6 +43,7 @@ mod similar_analytics; (path = "/", api = documents::DocumentsApi), (path = "/", api = facet_search::FacetSearchApi), (path = "/", api = similar::SimilarApi), + (path = "/", api = settings::SettingsApi), ), paths(list_indexes, create_index, get_index, update_index, delete_index, get_index_stats), tags( diff --git a/crates/meilisearch/src/routes/indexes/settings.rs b/crates/meilisearch/src/routes/indexes/settings.rs index b2922e5ff..cf104ee99 100644 --- a/crates/meilisearch/src/routes/indexes/settings.rs +++ b/crates/meilisearch/src/routes/indexes/settings.rs @@ -6,9 +6,12 @@ use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::update::Setting; -use meilisearch_types::settings::{settings, SecretPolicy, Settings, Unchecked}; +use meilisearch_types::settings::{ + settings, SecretPolicy, SettingEmbeddingSettings, Settings, Unchecked, +}; use meilisearch_types::tasks::KindWithContent; use tracing::debug; +use utoipa::OpenApi; use super::settings_analytics::*; use crate::analytics::Analytics; @@ -29,6 +32,20 @@ macro_rules! make_setting_routes { make_setting_route!($route, $update_verb, $type, $err_ty, $attr, $camelcase_attr, $analytics); )* + #[derive(OpenApi)] + #[openapi( + nest($((path = "/", api = $attr::$attr),)*), + // paths(/* update_all, get_all, delete_all,*/ $( $attr::get, $attr::update, $attr::delete,)*), + tags( + ( + name = "Settings", + description = "Use the /settings route to customize search settings for a given index. You can either modify all index settings at once using the update settings endpoint, or use a child route to configure a single setting.", + external_docs(url = "https://www.meilisearch.com/docs/reference/api/settings"), + ), + ), + )] + pub struct SettingsApi; + pub fn configure(cfg: &mut web::ServiceConfig) { use crate::extractors::sequential_extractor::SeqHandler; cfg.service( @@ -62,7 +79,42 @@ macro_rules! make_setting_route { use $crate::extractors::sequential_extractor::SeqHandler; use $crate::Opt; use $crate::routes::{is_dry_run, get_task_id, SummarizedTaskView}; + #[allow(unused_imports)] + use super::*; + #[derive(OpenApi)] + #[openapi( + paths(get, update, delete,), + )] + pub struct $attr; + + #[doc = $camelcase_attr] + #[utoipa::path( + delete, + path = "/", + tags = ["Indexes", "Settings"], + security(("Bearer" = ["settings.update", "settings.*", "*"])), + request_body = $type, + responses( + (status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": "movies", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) + )] pub async fn delete( index_scheduler: GuardedData< ActionPolicy<{ actions::SETTINGS_UPDATE }>, @@ -96,6 +148,34 @@ macro_rules! make_setting_route { Ok(HttpResponse::Accepted().json(task)) } + + #[doc = $camelcase_attr] + #[utoipa::path( + $update_verb, + path = "/", + tags = ["Indexes", "Settings"], + security(("Bearer" = ["settings.update", "settings.*", "*"])), + request_body = $type, + responses( + (status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": "movies", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) + )] pub async fn update( index_scheduler: GuardedData< ActionPolicy<{ actions::SETTINGS_UPDATE }>, @@ -151,6 +231,34 @@ macro_rules! make_setting_route { Ok(HttpResponse::Accepted().json(task)) } + + #[doc = $camelcase_attr] + #[utoipa::path( + get, + path = "/", + tags = ["Indexes", "Settings"], + security(("Bearer" = ["settings.get", "settings.*", "*"])), + request_body = $type, + responses( + (status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": "movies", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) + )] pub async fn get( index_scheduler: GuardedData< ActionPolicy<{ actions::SETTINGS_GET }>, @@ -359,7 +467,7 @@ make_setting_routes!( { route: "/embedders", update_verb: patch, - value_type: std::collections::BTreeMap>, + value_type: std::collections::BTreeMap, err_type: meilisearch_types::deserr::DeserrJsonError< meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders, >, diff --git a/crates/meilisearch/src/routes/indexes/settings_analytics.rs b/crates/meilisearch/src/routes/indexes/settings_analytics.rs index ddca2c00a..ffeadcab6 100644 --- a/crates/meilisearch/src/routes/indexes/settings_analytics.rs +++ b/crates/meilisearch/src/routes/indexes/settings_analytics.rs @@ -8,10 +8,9 @@ use std::collections::{BTreeMap, BTreeSet, HashSet}; use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView}; use meilisearch_types::milli::update::Setting; -use meilisearch_types::milli::vector::settings::EmbeddingSettings; use meilisearch_types::settings::{ FacetingSettings, PaginationSettings, PrefixSearchSettings, ProximityPrecisionView, - RankingRuleView, TypoSettings, + RankingRuleView, SettingEmbeddingSettings, TypoSettings, }; use serde::Serialize; @@ -497,13 +496,13 @@ pub struct EmbeddersAnalytics { } impl EmbeddersAnalytics { - pub fn new(setting: Option<&BTreeMap>>) -> Self { + pub fn new(setting: Option<&BTreeMap>) -> Self { let mut sources = std::collections::HashSet::new(); if let Some(s) = &setting { for source in s .values() - .filter_map(|config| config.clone().set()) + .filter_map(|config| config.inner.clone().set()) .filter_map(|config| config.source.set()) { use meilisearch_types::milli::vector::settings::EmbedderSource; @@ -522,18 +521,18 @@ impl EmbeddersAnalytics { sources: Some(sources), document_template_used: setting.as_ref().map(|map| { map.values() - .filter_map(|config| config.clone().set()) + .filter_map(|config| config.inner.clone().set()) .any(|config| config.document_template.set().is_some()) }), document_template_max_bytes: setting.as_ref().and_then(|map| { map.values() - .filter_map(|config| config.clone().set()) + .filter_map(|config| config.inner.clone().set()) .filter_map(|config| config.document_template_max_bytes.set()) .max() }), binary_quantization_used: setting.as_ref().map(|map| { map.values() - .filter_map(|config| config.clone().set()) + .filter_map(|config| config.inner.clone().set()) .any(|config| config.binary_quantized.set().is_some()) }), } diff --git a/crates/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs index 3592e74e3..85259c2d0 100644 --- a/crates/milli/src/update/settings.rs +++ b/crates/milli/src/update/settings.rs @@ -10,7 +10,6 @@ use itertools::{EitherOrBoth, Itertools}; use roaring::RoaringBitmap; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use time::OffsetDateTime; -use utoipa::{PartialSchema, ToSchema}; use super::del_add::DelAddOperation; use super::index_documents::{IndexDocumentsConfig, Transform}; @@ -41,18 +40,6 @@ pub enum Setting { NotSet, } -impl ToSchema for Setting { - fn name() -> std::borrow::Cow<'static, str> { - T::name() - } -} - -impl PartialSchema for Setting { - fn schema() -> utoipa::openapi::RefOr { - T::schema() - } -} - impl Deserr for Setting where T: Deserr, diff --git a/crates/milli/src/vector/mod.rs b/crates/milli/src/vector/mod.rs index a1d71ef93..0be698027 100644 --- a/crates/milli/src/vector/mod.rs +++ b/crates/milli/src/vector/mod.rs @@ -9,6 +9,7 @@ use heed::{RoTxn, RwTxn, Unspecified}; use ordered_float::OrderedFloat; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; use self::error::{EmbedError, NewEmbedderError}; use crate::prompt::{Prompt, PromptData}; @@ -710,18 +711,20 @@ impl Embedder { /// /// The intended use is to make the similarity score more comparable to the regular ranking score. /// This allows to correct effects where results are too "packed" around a certain value. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize, Serialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize, Serialize, ToSchema)] #[serde(from = "DistributionShiftSerializable")] #[serde(into = "DistributionShiftSerializable")] pub struct DistributionShift { /// Value where the results are "packed". /// /// Similarity scores are translated so that they are packed around 0.5 instead + #[schema(value_type = f32)] pub current_mean: OrderedFloat, /// standard deviation of a similarity score. /// /// Set below 0.4 to make the results less packed around the mean, and above 0.4 to make them more packed. + #[schema(value_type = f32)] pub current_sigma: OrderedFloat, } diff --git a/crates/milli/src/vector/settings.rs b/crates/milli/src/vector/settings.rs index d1cf364a2..4a1b1882c 100644 --- a/crates/milli/src/vector/settings.rs +++ b/crates/milli/src/vector/settings.rs @@ -4,6 +4,7 @@ use std::num::NonZeroUsize; use deserr::Deserr; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; use super::{ollama, openai, DistributionShift}; use crate::prompt::{default_max_bytes, PromptData}; @@ -11,48 +12,61 @@ use crate::update::Setting; use crate::vector::EmbeddingConfig; use crate::UserError; -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] #[serde(deny_unknown_fields, rename_all = "camelCase")] #[deserr(rename_all = camelCase, deny_unknown_fields)] pub struct EmbeddingSettings { #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub source: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub model: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub revision: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub api_key: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub dimensions: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub binary_quantized: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub document_template: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub document_template_max_bytes: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub url: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub request: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub response: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option>)] pub headers: Setting>, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub distribution: Setting, } @@ -539,7 +553,7 @@ impl EmbeddingSettings { } } -#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] +#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] #[serde(deny_unknown_fields, rename_all = "camelCase")] #[deserr(rename_all = camelCase, deny_unknown_fields)] pub enum EmbedderSource {