5401: Make composite embedders an experimental feature r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #5343 

## What does this PR do?
- Introduce new `compositeEmbedders` experimental feature
- Guard `source = "composite"` and `searchEmbedder`, `indexingEmbedder` behind enabling the feature.
- Update tests accordingly

## Dumpless upgrade

- Adding an experimental feature is never a breaking change

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2025-03-11 14:20:36 +00:00 committed by GitHub
commit d0dda78f3d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 185 additions and 11 deletions

View File

@ -118,6 +118,19 @@ impl RoFeatures {
.into())
}
}
pub fn check_composite_embedders(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.composite_embedders {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action,
feature: "composite embedders",
issue_link: "https://github.com/orgs/meilisearch/discussions/816",
}
.into())
}
}
}
impl FeatureData {

View File

@ -11,6 +11,7 @@ pub struct RuntimeTogglableFeatures {
pub contains_filter: bool,
pub network: bool,
pub get_task_documents_route: bool,
pub composite_embedders: bool,
}
#[derive(Default, Debug, Clone, Copy)]

View File

@ -198,6 +198,7 @@ struct Infos {
experimental_limit_batched_tasks_total_size: u64,
experimental_network: bool,
experimental_get_task_documents_route: bool,
experimental_composite_embedders: bool,
gpu_enabled: bool,
db_path: bool,
import_dump: bool,
@ -290,6 +291,7 @@ impl Infos {
contains_filter,
network,
get_task_documents_route,
composite_embedders,
} = features;
// We're going to override every sensible information.
@ -309,6 +311,7 @@ impl Infos {
experimental_reduce_indexing_memory_usage,
experimental_network: network,
experimental_get_task_documents_route: get_task_documents_route,
experimental_composite_embedders: composite_embedders,
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),

View File

@ -52,6 +52,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
contains_filter: Some(false),
network: Some(false),
get_task_documents_route: Some(false),
composite_embedders: Some(false),
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
@ -94,6 +95,8 @@ pub struct RuntimeTogglableFeatures {
pub network: Option<bool>,
#[deserr(default)]
pub get_task_documents_route: Option<bool>,
#[deserr(default)]
pub composite_embedders: Option<bool>,
}
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
@ -105,6 +108,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
contains_filter,
network,
get_task_documents_route,
composite_embedders,
} = value;
Self {
@ -114,6 +118,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
contains_filter: Some(contains_filter),
network: Some(network),
get_task_documents_route: Some(get_task_documents_route),
composite_embedders: Some(composite_embedders),
}
}
}
@ -126,6 +131,7 @@ pub struct PatchExperimentalFeatureAnalytics {
contains_filter: bool,
network: bool,
get_task_documents_route: bool,
composite_embedders: bool,
}
impl Aggregate for PatchExperimentalFeatureAnalytics {
@ -141,6 +147,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
contains_filter: new.contains_filter,
network: new.network,
get_task_documents_route: new.get_task_documents_route,
composite_embedders: new.composite_embedders,
})
}
@ -165,6 +172,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
contains_filter: Some(false),
network: Some(false),
get_task_documents_route: Some(false),
composite_embedders: Some(false),
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
@ -202,6 +210,10 @@ async fn patch_features(
.0
.get_task_documents_route
.unwrap_or(old_features.get_task_documents_route),
composite_embedders: new_features
.0
.composite_embedders
.unwrap_or(old_features.composite_embedders),
};
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
@ -214,6 +226,7 @@ async fn patch_features(
contains_filter,
network,
get_task_documents_route,
composite_embedders,
} = new_features;
analytics.publish(
@ -224,6 +237,7 @@ async fn patch_features(
contains_filter,
network,
get_task_documents_route,
composite_embedders,
},
&req,
);

View File

@ -716,7 +716,30 @@ pub async fn delete_all(
fn validate_settings(
settings: Settings<Unchecked>,
_index_scheduler: &IndexScheduler,
index_scheduler: &IndexScheduler,
) -> Result<Settings<Unchecked>, ResponseError> {
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::vector::settings::EmbedderSource;
let features = index_scheduler.features();
if let Setting::Set(embedders) = &settings.embedders {
for SettingEmbeddingSettings { inner: embedder } in embedders.values() {
let Setting::Set(embedder) = embedder else {
continue;
};
if matches!(embedder.source, Setting::Set(EmbedderSource::Composite)) {
features.check_composite_embedders("using `\"composite\"` as source")?;
}
if matches!(embedder.search_embedder, Setting::Set(_)) {
features.check_composite_embedders("setting `searchEmbedder`")?;
}
if matches!(embedder.indexing_embedder, Setting::Set(_)) {
features.check_composite_embedders("setting `indexingEmbedder`")?;
}
}
}
Ok(settings.validate()?)
}

View File

@ -2132,7 +2132,8 @@ async fn import_dump_v6_containing_experimental_features() {
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"getTaskDocumentsRoute": false,
"compositeEmbedders": false
}
"###);
@ -2254,7 +2255,8 @@ async fn import_dump_v6_containing_batches_and_enqueued_tasks() {
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"getTaskDocumentsRoute": false,
"compositeEmbedders": false
}
"###);
@ -2358,7 +2360,8 @@ async fn generate_and_import_dump_containing_vectors() {
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"getTaskDocumentsRoute": false,
"compositeEmbedders": false
}
"###);

View File

@ -23,7 +23,8 @@ async fn experimental_features() {
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"getTaskDocumentsRoute": false,
"compositeEmbedders": false
}
"###);
@ -37,7 +38,8 @@ async fn experimental_features() {
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"getTaskDocumentsRoute": false,
"compositeEmbedders": false
}
"###);
@ -51,7 +53,8 @@ async fn experimental_features() {
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"getTaskDocumentsRoute": false,
"compositeEmbedders": false
}
"###);
@ -66,7 +69,8 @@ async fn experimental_features() {
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"getTaskDocumentsRoute": false,
"compositeEmbedders": false
}
"###);
@ -81,7 +85,8 @@ async fn experimental_features() {
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"getTaskDocumentsRoute": false,
"compositeEmbedders": false
}
"###);
}
@ -103,7 +108,8 @@ async fn experimental_feature_metrics() {
"editDocumentsByFunction": false,
"containsFilter": false,
"network": false,
"getTaskDocumentsRoute": false
"getTaskDocumentsRoute": false,
"compositeEmbedders": false
}
"###);
@ -158,7 +164,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`",
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`, `compositeEmbedders`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"

View File

@ -412,6 +412,117 @@ async fn ollama_url_checks() {
async fn composite_checks() {
let server = Server::new().await;
let index = server.index("test");
// feature not enabled, using source
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "using `\"composite\"` as source requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// feature not enabled, using search embedder
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "userProvided",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
}
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "setting `searchEmbedder` requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// feature not enabled, using indexing embedder
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "userProvided",
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
}
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "setting `indexingEmbedder` requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// enable feature
let (_, code) = server.set_features(json!({"compositeEmbedders": true})).await;
snapshot!(code, @"200 OK");
// inner distribution
let (response, _code) = index
.update_settings(json!({