5401: Make composite embedders an experimental feature r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #5343 

## What does this PR do?
- Introduce new `compositeEmbedders` experimental feature
- Guard `source = "composite"` and `searchEmbedder`, `indexingEmbedder` behind enabling the feature.
- Update tests accordingly

## Dumpless upgrade

- Adding an experimental feature is never a breaking change

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2025-03-11 14:20:36 +00:00 committed by GitHub
commit d0dda78f3d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 185 additions and 11 deletions

View File

@ -118,6 +118,19 @@ impl RoFeatures {
.into()) .into())
} }
} }
pub fn check_composite_embedders(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.composite_embedders {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action,
feature: "composite embedders",
issue_link: "https://github.com/orgs/meilisearch/discussions/816",
}
.into())
}
}
} }
impl FeatureData { impl FeatureData {

View File

@ -11,6 +11,7 @@ pub struct RuntimeTogglableFeatures {
pub contains_filter: bool, pub contains_filter: bool,
pub network: bool, pub network: bool,
pub get_task_documents_route: bool, pub get_task_documents_route: bool,
pub composite_embedders: bool,
} }
#[derive(Default, Debug, Clone, Copy)] #[derive(Default, Debug, Clone, Copy)]

View File

@ -198,6 +198,7 @@ struct Infos {
experimental_limit_batched_tasks_total_size: u64, experimental_limit_batched_tasks_total_size: u64,
experimental_network: bool, experimental_network: bool,
experimental_get_task_documents_route: bool, experimental_get_task_documents_route: bool,
experimental_composite_embedders: bool,
gpu_enabled: bool, gpu_enabled: bool,
db_path: bool, db_path: bool,
import_dump: bool, import_dump: bool,
@ -290,6 +291,7 @@ impl Infos {
contains_filter, contains_filter,
network, network,
get_task_documents_route, get_task_documents_route,
composite_embedders,
} = features; } = features;
// We're going to override every sensible information. // We're going to override every sensible information.
@ -309,6 +311,7 @@ impl Infos {
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
experimental_network: network, experimental_network: network,
experimental_get_task_documents_route: get_task_documents_route, experimental_get_task_documents_route: get_task_documents_route,
experimental_composite_embedders: composite_embedders,
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(), gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
db_path: db_path != PathBuf::from("./data.ms"), db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(), import_dump: import_dump.is_some(),

View File

@ -52,6 +52,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
contains_filter: Some(false), contains_filter: Some(false),
network: Some(false), network: Some(false),
get_task_documents_route: Some(false), get_task_documents_route: Some(false),
composite_embedders: Some(false),
})), })),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{ {
@ -94,6 +95,8 @@ pub struct RuntimeTogglableFeatures {
pub network: Option<bool>, pub network: Option<bool>,
#[deserr(default)] #[deserr(default)]
pub get_task_documents_route: Option<bool>, pub get_task_documents_route: Option<bool>,
#[deserr(default)]
pub composite_embedders: Option<bool>,
} }
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures { impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
@ -105,6 +108,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
contains_filter, contains_filter,
network, network,
get_task_documents_route, get_task_documents_route,
composite_embedders,
} = value; } = value;
Self { Self {
@ -114,6 +118,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
contains_filter: Some(contains_filter), contains_filter: Some(contains_filter),
network: Some(network), network: Some(network),
get_task_documents_route: Some(get_task_documents_route), get_task_documents_route: Some(get_task_documents_route),
composite_embedders: Some(composite_embedders),
} }
} }
} }
@ -126,6 +131,7 @@ pub struct PatchExperimentalFeatureAnalytics {
contains_filter: bool, contains_filter: bool,
network: bool, network: bool,
get_task_documents_route: bool, get_task_documents_route: bool,
composite_embedders: bool,
} }
impl Aggregate for PatchExperimentalFeatureAnalytics { impl Aggregate for PatchExperimentalFeatureAnalytics {
@ -141,6 +147,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
contains_filter: new.contains_filter, contains_filter: new.contains_filter,
network: new.network, network: new.network,
get_task_documents_route: new.get_task_documents_route, get_task_documents_route: new.get_task_documents_route,
composite_embedders: new.composite_embedders,
}) })
} }
@ -165,6 +172,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
contains_filter: Some(false), contains_filter: Some(false),
network: Some(false), network: Some(false),
get_task_documents_route: Some(false), get_task_documents_route: Some(false),
composite_embedders: Some(false),
})), })),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{ {
@ -202,6 +210,10 @@ async fn patch_features(
.0 .0
.get_task_documents_route .get_task_documents_route
.unwrap_or(old_features.get_task_documents_route), .unwrap_or(old_features.get_task_documents_route),
composite_embedders: new_features
.0
.composite_embedders
.unwrap_or(old_features.composite_embedders),
}; };
// explicitly destructure for analytics rather than using the `Serialize` implementation, because // explicitly destructure for analytics rather than using the `Serialize` implementation, because
@ -214,6 +226,7 @@ async fn patch_features(
contains_filter, contains_filter,
network, network,
get_task_documents_route, get_task_documents_route,
composite_embedders,
} = new_features; } = new_features;
analytics.publish( analytics.publish(
@ -224,6 +237,7 @@ async fn patch_features(
contains_filter, contains_filter,
network, network,
get_task_documents_route, get_task_documents_route,
composite_embedders,
}, },
&req, &req,
); );

View File

@ -716,7 +716,30 @@ pub async fn delete_all(
fn validate_settings( fn validate_settings(
settings: Settings<Unchecked>, settings: Settings<Unchecked>,
_index_scheduler: &IndexScheduler, index_scheduler: &IndexScheduler,
) -> Result<Settings<Unchecked>, ResponseError> { ) -> Result<Settings<Unchecked>, ResponseError> {
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::vector::settings::EmbedderSource;
let features = index_scheduler.features();
if let Setting::Set(embedders) = &settings.embedders {
for SettingEmbeddingSettings { inner: embedder } in embedders.values() {
let Setting::Set(embedder) = embedder else {
continue;
};
if matches!(embedder.source, Setting::Set(EmbedderSource::Composite)) {
features.check_composite_embedders("using `\"composite\"` as source")?;
}
if matches!(embedder.search_embedder, Setting::Set(_)) {
features.check_composite_embedders("setting `searchEmbedder`")?;
}
if matches!(embedder.indexing_embedder, Setting::Set(_)) {
features.check_composite_embedders("setting `indexingEmbedder`")?;
}
}
}
Ok(settings.validate()?) Ok(settings.validate()?)
} }

View File

@ -2132,7 +2132,8 @@ async fn import_dump_v6_containing_experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
@ -2254,7 +2255,8 @@ async fn import_dump_v6_containing_batches_and_enqueued_tasks() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
@ -2358,7 +2360,8 @@ async fn generate_and_import_dump_containing_vectors() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);

View File

@ -23,7 +23,8 @@ async fn experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
@ -37,7 +38,8 @@ async fn experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
@ -51,7 +53,8 @@ async fn experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
@ -66,7 +69,8 @@ async fn experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
@ -81,7 +85,8 @@ async fn experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
} }
@ -103,7 +108,8 @@ async fn experimental_feature_metrics() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
@ -158,7 +164,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`", "message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`, `compositeEmbedders`",
"code": "bad_request", "code": "bad_request",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request" "link": "https://docs.meilisearch.com/errors#bad_request"

View File

@ -412,6 +412,117 @@ async fn ollama_url_checks() {
async fn composite_checks() { async fn composite_checks() {
let server = Server::new().await; let server = Server::new().await;
let index = server.index("test"); let index = server.index("test");
// feature not enabled, using source
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "using `\"composite\"` as source requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// feature not enabled, using search embedder
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "userProvided",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
}
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "setting `searchEmbedder` requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// feature not enabled, using indexing embedder
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "userProvided",
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
}
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "setting `indexingEmbedder` requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// enable feature
let (_, code) = server.set_features(json!({"compositeEmbedders": true})).await;
snapshot!(code, @"200 OK");
// inner distribution // inner distribution
let (response, _code) = index let (response, _code) = index
.update_settings(json!({ .update_settings(json!({