try my best to make the sub-settings routes works, it doesn't

This commit is contained in:
Tamo 2024-12-23 20:52:47 +01:00
parent 4eaa626bca
commit 0bf4157a75
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
9 changed files with 171 additions and 34 deletions

1
Cargo.lock generated
View File

@ -3471,6 +3471,7 @@ dependencies = [
"clap",
"crossbeam-channel",
"deserr",
"doc-comment",
"dump",
"either",
"file-store",

View File

@ -144,6 +144,25 @@ impl MergeWithError<milli::CriterionError> for DeserrJsonError<InvalidSettingsRa
}
}
#[derive(Debug, Default, Serialize, Deserialize, PartialEq, Eq, Clone, ToSchema)]
pub struct SettingEmbeddingSettings {
#[serde(flatten)]
#[schema(inline, value_type = Option<crate::milli::vector::settings::EmbeddingSettings>)]
pub inner: Setting<crate::milli::vector::settings::EmbeddingSettings>,
}
impl<E: DeserializeError> Deserr<E> for SettingEmbeddingSettings {
fn deserialize_from_value<V: deserr::IntoValue>(
value: deserr::Value<V>,
location: ValuePointerRef,
) -> Result<Self, E> {
Setting::<crate::milli::vector::settings::EmbeddingSettings>::deserialize_from_value(
value, location,
)
.map(|inner| Self { inner })
}
}
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
@ -237,7 +256,7 @@ pub struct Settings<T> {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsEmbedders>)]
#[schema(value_type = String)] // TODO: TAMO
pub embedders: Setting<BTreeMap<String, Setting<milli::vector::settings::EmbeddingSettings>>>,
pub embedders: Setting<BTreeMap<String, SettingEmbeddingSettings>>,
/// Maximum duration of a search query.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSearchCutoffMs>)]
@ -254,7 +273,6 @@ pub struct Settings<T> {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsPrefixSearch>)]
#[schema(value_type = Option<PrefixSearchSettings>, example = json!("Hemlo"))]
// TODO: TAMO
pub prefix_search: Setting<PrefixSearchSettings>,
#[serde(skip)]
@ -269,7 +287,7 @@ impl<T> Settings<T> {
};
for mut embedder in embedders.values_mut() {
let Setting::Set(embedder) = &mut embedder else {
let SettingEmbeddingSettings { inner: Setting::Set(embedder) } = &mut embedder else {
continue;
};
@ -434,8 +452,9 @@ impl Settings<Unchecked> {
let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
for (name, config) in configs.iter_mut() {
let config_to_check = std::mem::take(config);
let checked_config = milli::update::validate_embedding_settings(config_to_check, name)?;
*config = checked_config
let checked_config =
milli::update::validate_embedding_settings(config_to_check.inner, name)?;
*config = SettingEmbeddingSettings { inner: checked_config };
}
self.embedders = Setting::Set(configs);
Ok(self)
@ -713,7 +732,9 @@ pub fn apply_settings_to_builder(
}
match embedders {
Setting::Set(value) => builder.set_embedder_settings(value.clone()),
Setting::Set(value) => builder.set_embedder_settings(
value.iter().map(|(k, v)| (k.clone(), v.inner.clone())).collect(),
),
Setting::Reset => builder.reset_embedder_settings(),
Setting::NotSet => (),
}
@ -827,7 +848,9 @@ pub fn settings(
let embedders: BTreeMap<_, _> = index
.embedding_configs(rtxn)?
.into_iter()
.map(|IndexEmbeddingConfig { name, config, .. }| (name, Setting::Set(config.into())))
.map(|IndexEmbeddingConfig { name, config, .. }| {
(name, SettingEmbeddingSettings { inner: Setting::Set(config.into()) })
})
.collect();
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
@ -886,7 +909,7 @@ pub fn settings(
Ok(settings)
}
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[derive(Debug, Clone, PartialEq, Eq, Deserr, ToSchema)]
#[deserr(try_from(&String) = FromStr::from_str -> CriterionError)]
pub enum RankingRuleView {
/// Sorted by decreasing number of matched query terms.
@ -982,7 +1005,7 @@ impl From<RankingRuleView> for Criterion {
}
}
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize, ToSchema)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
pub enum ProximityPrecisionView {

View File

@ -109,6 +109,7 @@ utoipa = { version = "5.2.0", features = ["actix_extras", "macros", "non_strict_
utoipa-scalar = { version = "0.2.0", features = ["actix-web"] }
utoipa-rapidoc = { version = "5.0.0", features = ["actix-web"] }
utoipa-redoc = { version = "5.0.0", features = ["actix-web"] }
doc-comment = "0.3.3"
[dev-dependencies]
actix-rt = "2.10.0"

View File

@ -43,6 +43,7 @@ mod similar_analytics;
(path = "/", api = documents::DocumentsApi),
(path = "/", api = facet_search::FacetSearchApi),
(path = "/", api = similar::SimilarApi),
(path = "/", api = settings::SettingsApi),
),
paths(list_indexes, create_index, get_index, update_index, delete_index, get_index_stats),
tags(

View File

@ -6,9 +6,12 @@ use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, SecretPolicy, Settings, Unchecked};
use meilisearch_types::settings::{
settings, SecretPolicy, SettingEmbeddingSettings, Settings, Unchecked,
};
use meilisearch_types::tasks::KindWithContent;
use tracing::debug;
use utoipa::OpenApi;
use super::settings_analytics::*;
use crate::analytics::Analytics;
@ -29,6 +32,20 @@ macro_rules! make_setting_routes {
make_setting_route!($route, $update_verb, $type, $err_ty, $attr, $camelcase_attr, $analytics);
)*
#[derive(OpenApi)]
#[openapi(
nest($((path = "/", api = $attr::$attr),)*),
// paths(/* update_all, get_all, delete_all,*/ $( $attr::get, $attr::update, $attr::delete,)*),
tags(
(
name = "Settings",
description = "Use the /settings route to customize search settings for a given index. You can either modify all index settings at once using the update settings endpoint, or use a child route to configure a single setting.",
external_docs(url = "https://www.meilisearch.com/docs/reference/api/settings"),
),
),
)]
pub struct SettingsApi;
pub fn configure(cfg: &mut web::ServiceConfig) {
use crate::extractors::sequential_extractor::SeqHandler;
cfg.service(
@ -62,7 +79,42 @@ macro_rules! make_setting_route {
use $crate::extractors::sequential_extractor::SeqHandler;
use $crate::Opt;
use $crate::routes::{is_dry_run, get_task_id, SummarizedTaskView};
#[allow(unused_imports)]
use super::*;
#[derive(OpenApi)]
#[openapi(
paths(get, update, delete,),
)]
pub struct $attr;
#[doc = $camelcase_attr]
#[utoipa::path(
delete,
path = "/",
tags = ["Indexes", "Settings"],
security(("Bearer" = ["settings.update", "settings.*", "*"])),
request_body = $type,
responses(
(status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!(
{
"taskUid": 147,
"indexUid": "movies",
"status": "enqueued",
"type": "settingsUpdate",
"enqueuedAt": "2024-08-08T17:05:55.791772Z"
}
)),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
)),
)
)]
pub async fn delete(
index_scheduler: GuardedData<
ActionPolicy<{ actions::SETTINGS_UPDATE }>,
@ -96,6 +148,34 @@ macro_rules! make_setting_route {
Ok(HttpResponse::Accepted().json(task))
}
#[doc = $camelcase_attr]
#[utoipa::path(
$update_verb,
path = "/",
tags = ["Indexes", "Settings"],
security(("Bearer" = ["settings.update", "settings.*", "*"])),
request_body = $type,
responses(
(status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!(
{
"taskUid": 147,
"indexUid": "movies",
"status": "enqueued",
"type": "settingsUpdate",
"enqueuedAt": "2024-08-08T17:05:55.791772Z"
}
)),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
)),
)
)]
pub async fn update(
index_scheduler: GuardedData<
ActionPolicy<{ actions::SETTINGS_UPDATE }>,
@ -151,6 +231,34 @@ macro_rules! make_setting_route {
Ok(HttpResponse::Accepted().json(task))
}
#[doc = $camelcase_attr]
#[utoipa::path(
get,
path = "/",
tags = ["Indexes", "Settings"],
security(("Bearer" = ["settings.get", "settings.*", "*"])),
request_body = $type,
responses(
(status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!(
{
"taskUid": 147,
"indexUid": "movies",
"status": "enqueued",
"type": "settingsUpdate",
"enqueuedAt": "2024-08-08T17:05:55.791772Z"
}
)),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
)),
)
)]
pub async fn get(
index_scheduler: GuardedData<
ActionPolicy<{ actions::SETTINGS_GET }>,
@ -359,7 +467,7 @@ make_setting_routes!(
{
route: "/embedders",
update_verb: patch,
value_type: std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
value_type: std::collections::BTreeMap<String, SettingEmbeddingSettings>,
err_type: meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders,
>,

View File

@ -8,10 +8,9 @@ use std::collections::{BTreeMap, BTreeSet, HashSet};
use meilisearch_types::facet_values_sort::FacetValuesSort;
use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView};
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
use meilisearch_types::settings::{
FacetingSettings, PaginationSettings, PrefixSearchSettings, ProximityPrecisionView,
RankingRuleView, TypoSettings,
RankingRuleView, SettingEmbeddingSettings, TypoSettings,
};
use serde::Serialize;
@ -497,13 +496,13 @@ pub struct EmbeddersAnalytics {
}
impl EmbeddersAnalytics {
pub fn new(setting: Option<&BTreeMap<String, Setting<EmbeddingSettings>>>) -> Self {
pub fn new(setting: Option<&BTreeMap<String, SettingEmbeddingSettings>>) -> Self {
let mut sources = std::collections::HashSet::new();
if let Some(s) = &setting {
for source in s
.values()
.filter_map(|config| config.clone().set())
.filter_map(|config| config.inner.clone().set())
.filter_map(|config| config.source.set())
{
use meilisearch_types::milli::vector::settings::EmbedderSource;
@ -522,18 +521,18 @@ impl EmbeddersAnalytics {
sources: Some(sources),
document_template_used: setting.as_ref().map(|map| {
map.values()
.filter_map(|config| config.clone().set())
.filter_map(|config| config.inner.clone().set())
.any(|config| config.document_template.set().is_some())
}),
document_template_max_bytes: setting.as_ref().and_then(|map| {
map.values()
.filter_map(|config| config.clone().set())
.filter_map(|config| config.inner.clone().set())
.filter_map(|config| config.document_template_max_bytes.set())
.max()
}),
binary_quantization_used: setting.as_ref().map(|map| {
map.values()
.filter_map(|config| config.clone().set())
.filter_map(|config| config.inner.clone().set())
.any(|config| config.binary_quantized.set().is_some())
}),
}

View File

@ -10,7 +10,6 @@ use itertools::{EitherOrBoth, Itertools};
use roaring::RoaringBitmap;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use time::OffsetDateTime;
use utoipa::{PartialSchema, ToSchema};
use super::del_add::DelAddOperation;
use super::index_documents::{IndexDocumentsConfig, Transform};
@ -41,18 +40,6 @@ pub enum Setting<T> {
NotSet,
}
impl<T: ToSchema> ToSchema for Setting<T> {
fn name() -> std::borrow::Cow<'static, str> {
T::name()
}
}
impl<T: PartialSchema> PartialSchema for Setting<T> {
fn schema() -> utoipa::openapi::RefOr<utoipa::openapi::schema::Schema> {
T::schema()
}
}
impl<T, E> Deserr<E> for Setting<T>
where
T: Deserr<E>,

View File

@ -9,6 +9,7 @@ use heed::{RoTxn, RwTxn, Unspecified};
use ordered_float::OrderedFloat;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use self::error::{EmbedError, NewEmbedderError};
use crate::prompt::{Prompt, PromptData};
@ -710,18 +711,20 @@ impl Embedder {
///
/// The intended use is to make the similarity score more comparable to the regular ranking score.
/// This allows to correct effects where results are too "packed" around a certain value.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize, Serialize)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize, Serialize, ToSchema)]
#[serde(from = "DistributionShiftSerializable")]
#[serde(into = "DistributionShiftSerializable")]
pub struct DistributionShift {
/// Value where the results are "packed".
///
/// Similarity scores are translated so that they are packed around 0.5 instead
#[schema(value_type = f32)]
pub current_mean: OrderedFloat<f32>,
/// standard deviation of a similarity score.
///
/// Set below 0.4 to make the results less packed around the mean, and above 0.4 to make them more packed.
#[schema(value_type = f32)]
pub current_sigma: OrderedFloat<f32>,
}

View File

@ -4,6 +4,7 @@ use std::num::NonZeroUsize;
use deserr::Deserr;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use super::{ollama, openai, DistributionShift};
use crate::prompt::{default_max_bytes, PromptData};
@ -11,48 +12,61 @@ use crate::update::Setting;
use crate::vector::EmbeddingConfig;
use crate::UserError;
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(rename_all = camelCase, deny_unknown_fields)]
pub struct EmbeddingSettings {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
#[schema(value_type = Option<EmbedderSource>)]
pub source: Setting<EmbedderSource>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
#[schema(value_type = Option<String>)]
pub model: Setting<String>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
#[schema(value_type = Option<String>)]
pub revision: Setting<String>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
#[schema(value_type = Option<String>)]
pub api_key: Setting<String>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
#[schema(value_type = Option<String>)]
pub dimensions: Setting<usize>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
#[schema(value_type = Option<bool>)]
pub binary_quantized: Setting<bool>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
#[schema(value_type = Option<bool>)]
pub document_template: Setting<String>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
#[schema(value_type = Option<usize>)]
pub document_template_max_bytes: Setting<usize>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
#[schema(value_type = Option<String>)]
pub url: Setting<String>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
#[schema(value_type = Option<serde_json::Value>)]
pub request: Setting<serde_json::Value>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
#[schema(value_type = Option<serde_json::Value>)]
pub response: Setting<serde_json::Value>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
#[schema(value_type = Option<BTreeMap<String, String>>)]
pub headers: Setting<BTreeMap<String, String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
#[schema(value_type = Option<DistributionShift>)]
pub distribution: Setting<DistributionShift>,
}
@ -539,7 +553,7 @@ impl EmbeddingSettings {
}
}
#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(rename_all = camelCase, deny_unknown_fields)]
pub enum EmbedderSource {