mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-03-18 21:58:20 +01:00
Merge #5401
5401: Make composite embedders an experimental feature r=irevoire a=dureuill # Pull Request ## Related issue Fixes #5343 ## What does this PR do? - Introduce new `compositeEmbedders` experimental feature - Guard `source = "composite"` and `searchEmbedder`, `indexingEmbedder` behind enabling the feature. - Update tests accordingly ## Dumpless upgrade - Adding an experimental feature is never a breaking change Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
commit
d0dda78f3d
@ -118,6 +118,19 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_composite_embedders(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.composite_embedders {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "composite embedders",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/816",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
|
@ -11,6 +11,7 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub contains_filter: bool,
|
||||
pub network: bool,
|
||||
pub get_task_documents_route: bool,
|
||||
pub composite_embedders: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
|
@ -198,6 +198,7 @@ struct Infos {
|
||||
experimental_limit_batched_tasks_total_size: u64,
|
||||
experimental_network: bool,
|
||||
experimental_get_task_documents_route: bool,
|
||||
experimental_composite_embedders: bool,
|
||||
gpu_enabled: bool,
|
||||
db_path: bool,
|
||||
import_dump: bool,
|
||||
@ -290,6 +291,7 @@ impl Infos {
|
||||
contains_filter,
|
||||
network,
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
} = features;
|
||||
|
||||
// We're going to override every sensible information.
|
||||
@ -309,6 +311,7 @@ impl Infos {
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_network: network,
|
||||
experimental_get_task_documents_route: get_task_documents_route,
|
||||
experimental_composite_embedders: composite_embedders,
|
||||
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
||||
db_path: db_path != PathBuf::from("./data.ms"),
|
||||
import_dump: import_dump.is_some(),
|
||||
|
@ -52,6 +52,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
contains_filter: Some(false),
|
||||
network: Some(false),
|
||||
get_task_documents_route: Some(false),
|
||||
composite_embedders: Some(false),
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
@ -94,6 +95,8 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub network: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub get_task_documents_route: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub composite_embedders: Option<bool>,
|
||||
}
|
||||
|
||||
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
|
||||
@ -105,6 +108,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
||||
contains_filter,
|
||||
network,
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
} = value;
|
||||
|
||||
Self {
|
||||
@ -114,6 +118,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
||||
contains_filter: Some(contains_filter),
|
||||
network: Some(network),
|
||||
get_task_documents_route: Some(get_task_documents_route),
|
||||
composite_embedders: Some(composite_embedders),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -126,6 +131,7 @@ pub struct PatchExperimentalFeatureAnalytics {
|
||||
contains_filter: bool,
|
||||
network: bool,
|
||||
get_task_documents_route: bool,
|
||||
composite_embedders: bool,
|
||||
}
|
||||
|
||||
impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
@ -141,6 +147,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
contains_filter: new.contains_filter,
|
||||
network: new.network,
|
||||
get_task_documents_route: new.get_task_documents_route,
|
||||
composite_embedders: new.composite_embedders,
|
||||
})
|
||||
}
|
||||
|
||||
@ -165,6 +172,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
contains_filter: Some(false),
|
||||
network: Some(false),
|
||||
get_task_documents_route: Some(false),
|
||||
composite_embedders: Some(false),
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
@ -202,6 +210,10 @@ async fn patch_features(
|
||||
.0
|
||||
.get_task_documents_route
|
||||
.unwrap_or(old_features.get_task_documents_route),
|
||||
composite_embedders: new_features
|
||||
.0
|
||||
.composite_embedders
|
||||
.unwrap_or(old_features.composite_embedders),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
@ -214,6 +226,7 @@ async fn patch_features(
|
||||
contains_filter,
|
||||
network,
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
} = new_features;
|
||||
|
||||
analytics.publish(
|
||||
@ -224,6 +237,7 @@ async fn patch_features(
|
||||
contains_filter,
|
||||
network,
|
||||
get_task_documents_route,
|
||||
composite_embedders,
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
@ -716,7 +716,30 @@ pub async fn delete_all(
|
||||
|
||||
fn validate_settings(
|
||||
settings: Settings<Unchecked>,
|
||||
_index_scheduler: &IndexScheduler,
|
||||
index_scheduler: &IndexScheduler,
|
||||
) -> Result<Settings<Unchecked>, ResponseError> {
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::vector::settings::EmbedderSource;
|
||||
|
||||
let features = index_scheduler.features();
|
||||
if let Setting::Set(embedders) = &settings.embedders {
|
||||
for SettingEmbeddingSettings { inner: embedder } in embedders.values() {
|
||||
let Setting::Set(embedder) = embedder else {
|
||||
continue;
|
||||
};
|
||||
if matches!(embedder.source, Setting::Set(EmbedderSource::Composite)) {
|
||||
features.check_composite_embedders("using `\"composite\"` as source")?;
|
||||
}
|
||||
|
||||
if matches!(embedder.search_embedder, Setting::Set(_)) {
|
||||
features.check_composite_embedders("setting `searchEmbedder`")?;
|
||||
}
|
||||
|
||||
if matches!(embedder.indexing_embedder, Setting::Set(_)) {
|
||||
features.check_composite_embedders("setting `indexingEmbedder`")?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(settings.validate()?)
|
||||
}
|
||||
|
@ -2132,7 +2132,8 @@ async fn import_dump_v6_containing_experimental_features() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -2254,7 +2255,8 @@ async fn import_dump_v6_containing_batches_and_enqueued_tasks() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -2358,7 +2360,8 @@ async fn generate_and_import_dump_containing_vectors() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -23,7 +23,8 @@ async fn experimental_features() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -37,7 +38,8 @@ async fn experimental_features() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -51,7 +53,8 @@ async fn experimental_features() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -66,7 +69,8 @@ async fn experimental_features() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -81,7 +85,8 @@ async fn experimental_features() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
}
|
||||
@ -103,7 +108,8 @@ async fn experimental_feature_metrics() {
|
||||
"editDocumentsByFunction": false,
|
||||
"containsFilter": false,
|
||||
"network": false,
|
||||
"getTaskDocumentsRoute": false
|
||||
"getTaskDocumentsRoute": false,
|
||||
"compositeEmbedders": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -158,7 +164,7 @@ async fn errors() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`",
|
||||
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`, `compositeEmbedders`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
|
@ -412,6 +412,117 @@ async fn ollama_url_checks() {
|
||||
async fn composite_checks() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
// feature not enabled, using source
|
||||
let (response, _code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"test": null
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"test": {
|
||||
"source": "composite",
|
||||
"searchEmbedder": {
|
||||
"source": "huggingFace",
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
},
|
||||
"indexingEmbedder": {
|
||||
"source": "huggingFace",
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
},
|
||||
}
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "using `\"composite\"` as source requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
|
||||
// feature not enabled, using search embedder
|
||||
let (response, _code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"test": null
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"test": {
|
||||
"source": "userProvided",
|
||||
"searchEmbedder": {
|
||||
"source": "huggingFace",
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
}
|
||||
}
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "setting `searchEmbedder` requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
|
||||
// feature not enabled, using indexing embedder
|
||||
let (response, _code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"test": null
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
server.wait_task(response.uid()).await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"test": {
|
||||
"source": "userProvided",
|
||||
"indexingEmbedder": {
|
||||
"source": "huggingFace",
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
}
|
||||
}
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "setting `indexingEmbedder` requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
|
||||
// enable feature
|
||||
let (_, code) = server.set_features(json!({"compositeEmbedders": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
|
||||
// inner distribution
|
||||
let (response, _code) = index
|
||||
.update_settings(json!({
|
||||
|
Loading…
x
Reference in New Issue
Block a user