mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-03-19 06:08:20 +01:00
Merge #5401
5401: Make composite embedders an experimental feature r=irevoire a=dureuill # Pull Request ## Related issue Fixes #5343 ## What does this PR do? - Introduce new `compositeEmbedders` experimental feature - Guard `source = "composite"` and `searchEmbedder`, `indexingEmbedder` behind enabling the feature. - Update tests accordingly ## Dumpless upgrade - Adding an experimental feature is never a breaking change Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
commit
d0dda78f3d
@ -118,6 +118,19 @@ impl RoFeatures {
|
|||||||
.into())
|
.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn check_composite_embedders(&self, disabled_action: &'static str) -> Result<()> {
|
||||||
|
if self.runtime.composite_embedders {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(FeatureNotEnabledError {
|
||||||
|
disabled_action,
|
||||||
|
feature: "composite embedders",
|
||||||
|
issue_link: "https://github.com/orgs/meilisearch/discussions/816",
|
||||||
|
}
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FeatureData {
|
impl FeatureData {
|
||||||
|
@ -11,6 +11,7 @@ pub struct RuntimeTogglableFeatures {
|
|||||||
pub contains_filter: bool,
|
pub contains_filter: bool,
|
||||||
pub network: bool,
|
pub network: bool,
|
||||||
pub get_task_documents_route: bool,
|
pub get_task_documents_route: bool,
|
||||||
|
pub composite_embedders: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Debug, Clone, Copy)]
|
#[derive(Default, Debug, Clone, Copy)]
|
||||||
|
@ -198,6 +198,7 @@ struct Infos {
|
|||||||
experimental_limit_batched_tasks_total_size: u64,
|
experimental_limit_batched_tasks_total_size: u64,
|
||||||
experimental_network: bool,
|
experimental_network: bool,
|
||||||
experimental_get_task_documents_route: bool,
|
experimental_get_task_documents_route: bool,
|
||||||
|
experimental_composite_embedders: bool,
|
||||||
gpu_enabled: bool,
|
gpu_enabled: bool,
|
||||||
db_path: bool,
|
db_path: bool,
|
||||||
import_dump: bool,
|
import_dump: bool,
|
||||||
@ -290,6 +291,7 @@ impl Infos {
|
|||||||
contains_filter,
|
contains_filter,
|
||||||
network,
|
network,
|
||||||
get_task_documents_route,
|
get_task_documents_route,
|
||||||
|
composite_embedders,
|
||||||
} = features;
|
} = features;
|
||||||
|
|
||||||
// We're going to override every sensible information.
|
// We're going to override every sensible information.
|
||||||
@ -309,6 +311,7 @@ impl Infos {
|
|||||||
experimental_reduce_indexing_memory_usage,
|
experimental_reduce_indexing_memory_usage,
|
||||||
experimental_network: network,
|
experimental_network: network,
|
||||||
experimental_get_task_documents_route: get_task_documents_route,
|
experimental_get_task_documents_route: get_task_documents_route,
|
||||||
|
experimental_composite_embedders: composite_embedders,
|
||||||
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
||||||
db_path: db_path != PathBuf::from("./data.ms"),
|
db_path: db_path != PathBuf::from("./data.ms"),
|
||||||
import_dump: import_dump.is_some(),
|
import_dump: import_dump.is_some(),
|
||||||
|
@ -52,6 +52,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||||||
contains_filter: Some(false),
|
contains_filter: Some(false),
|
||||||
network: Some(false),
|
network: Some(false),
|
||||||
get_task_documents_route: Some(false),
|
get_task_documents_route: Some(false),
|
||||||
|
composite_embedders: Some(false),
|
||||||
})),
|
})),
|
||||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||||
{
|
{
|
||||||
@ -94,6 +95,8 @@ pub struct RuntimeTogglableFeatures {
|
|||||||
pub network: Option<bool>,
|
pub network: Option<bool>,
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
pub get_task_documents_route: Option<bool>,
|
pub get_task_documents_route: Option<bool>,
|
||||||
|
#[deserr(default)]
|
||||||
|
pub composite_embedders: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
|
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
|
||||||
@ -105,6 +108,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
|||||||
contains_filter,
|
contains_filter,
|
||||||
network,
|
network,
|
||||||
get_task_documents_route,
|
get_task_documents_route,
|
||||||
|
composite_embedders,
|
||||||
} = value;
|
} = value;
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
@ -114,6 +118,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
|||||||
contains_filter: Some(contains_filter),
|
contains_filter: Some(contains_filter),
|
||||||
network: Some(network),
|
network: Some(network),
|
||||||
get_task_documents_route: Some(get_task_documents_route),
|
get_task_documents_route: Some(get_task_documents_route),
|
||||||
|
composite_embedders: Some(composite_embedders),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -126,6 +131,7 @@ pub struct PatchExperimentalFeatureAnalytics {
|
|||||||
contains_filter: bool,
|
contains_filter: bool,
|
||||||
network: bool,
|
network: bool,
|
||||||
get_task_documents_route: bool,
|
get_task_documents_route: bool,
|
||||||
|
composite_embedders: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Aggregate for PatchExperimentalFeatureAnalytics {
|
impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||||
@ -141,6 +147,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
|||||||
contains_filter: new.contains_filter,
|
contains_filter: new.contains_filter,
|
||||||
network: new.network,
|
network: new.network,
|
||||||
get_task_documents_route: new.get_task_documents_route,
|
get_task_documents_route: new.get_task_documents_route,
|
||||||
|
composite_embedders: new.composite_embedders,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -165,6 +172,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
|||||||
contains_filter: Some(false),
|
contains_filter: Some(false),
|
||||||
network: Some(false),
|
network: Some(false),
|
||||||
get_task_documents_route: Some(false),
|
get_task_documents_route: Some(false),
|
||||||
|
composite_embedders: Some(false),
|
||||||
})),
|
})),
|
||||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||||
{
|
{
|
||||||
@ -202,6 +210,10 @@ async fn patch_features(
|
|||||||
.0
|
.0
|
||||||
.get_task_documents_route
|
.get_task_documents_route
|
||||||
.unwrap_or(old_features.get_task_documents_route),
|
.unwrap_or(old_features.get_task_documents_route),
|
||||||
|
composite_embedders: new_features
|
||||||
|
.0
|
||||||
|
.composite_embedders
|
||||||
|
.unwrap_or(old_features.composite_embedders),
|
||||||
};
|
};
|
||||||
|
|
||||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||||
@ -214,6 +226,7 @@ async fn patch_features(
|
|||||||
contains_filter,
|
contains_filter,
|
||||||
network,
|
network,
|
||||||
get_task_documents_route,
|
get_task_documents_route,
|
||||||
|
composite_embedders,
|
||||||
} = new_features;
|
} = new_features;
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
@ -224,6 +237,7 @@ async fn patch_features(
|
|||||||
contains_filter,
|
contains_filter,
|
||||||
network,
|
network,
|
||||||
get_task_documents_route,
|
get_task_documents_route,
|
||||||
|
composite_embedders,
|
||||||
},
|
},
|
||||||
&req,
|
&req,
|
||||||
);
|
);
|
||||||
|
@ -716,7 +716,30 @@ pub async fn delete_all(
|
|||||||
|
|
||||||
fn validate_settings(
|
fn validate_settings(
|
||||||
settings: Settings<Unchecked>,
|
settings: Settings<Unchecked>,
|
||||||
_index_scheduler: &IndexScheduler,
|
index_scheduler: &IndexScheduler,
|
||||||
) -> Result<Settings<Unchecked>, ResponseError> {
|
) -> Result<Settings<Unchecked>, ResponseError> {
|
||||||
|
use meilisearch_types::milli::update::Setting;
|
||||||
|
use meilisearch_types::milli::vector::settings::EmbedderSource;
|
||||||
|
|
||||||
|
let features = index_scheduler.features();
|
||||||
|
if let Setting::Set(embedders) = &settings.embedders {
|
||||||
|
for SettingEmbeddingSettings { inner: embedder } in embedders.values() {
|
||||||
|
let Setting::Set(embedder) = embedder else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
if matches!(embedder.source, Setting::Set(EmbedderSource::Composite)) {
|
||||||
|
features.check_composite_embedders("using `\"composite\"` as source")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if matches!(embedder.search_embedder, Setting::Set(_)) {
|
||||||
|
features.check_composite_embedders("setting `searchEmbedder`")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if matches!(embedder.indexing_embedder, Setting::Set(_)) {
|
||||||
|
features.check_composite_embedders("setting `indexingEmbedder`")?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(settings.validate()?)
|
Ok(settings.validate()?)
|
||||||
}
|
}
|
||||||
|
@ -2132,7 +2132,8 @@ async fn import_dump_v6_containing_experimental_features() {
|
|||||||
"editDocumentsByFunction": false,
|
"editDocumentsByFunction": false,
|
||||||
"containsFilter": false,
|
"containsFilter": false,
|
||||||
"network": false,
|
"network": false,
|
||||||
"getTaskDocumentsRoute": false
|
"getTaskDocumentsRoute": false,
|
||||||
|
"compositeEmbedders": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -2254,7 +2255,8 @@ async fn import_dump_v6_containing_batches_and_enqueued_tasks() {
|
|||||||
"editDocumentsByFunction": false,
|
"editDocumentsByFunction": false,
|
||||||
"containsFilter": false,
|
"containsFilter": false,
|
||||||
"network": false,
|
"network": false,
|
||||||
"getTaskDocumentsRoute": false
|
"getTaskDocumentsRoute": false,
|
||||||
|
"compositeEmbedders": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -2358,7 +2360,8 @@ async fn generate_and_import_dump_containing_vectors() {
|
|||||||
"editDocumentsByFunction": false,
|
"editDocumentsByFunction": false,
|
||||||
"containsFilter": false,
|
"containsFilter": false,
|
||||||
"network": false,
|
"network": false,
|
||||||
"getTaskDocumentsRoute": false
|
"getTaskDocumentsRoute": false,
|
||||||
|
"compositeEmbedders": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
@ -23,7 +23,8 @@ async fn experimental_features() {
|
|||||||
"editDocumentsByFunction": false,
|
"editDocumentsByFunction": false,
|
||||||
"containsFilter": false,
|
"containsFilter": false,
|
||||||
"network": false,
|
"network": false,
|
||||||
"getTaskDocumentsRoute": false
|
"getTaskDocumentsRoute": false,
|
||||||
|
"compositeEmbedders": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -37,7 +38,8 @@ async fn experimental_features() {
|
|||||||
"editDocumentsByFunction": false,
|
"editDocumentsByFunction": false,
|
||||||
"containsFilter": false,
|
"containsFilter": false,
|
||||||
"network": false,
|
"network": false,
|
||||||
"getTaskDocumentsRoute": false
|
"getTaskDocumentsRoute": false,
|
||||||
|
"compositeEmbedders": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -51,7 +53,8 @@ async fn experimental_features() {
|
|||||||
"editDocumentsByFunction": false,
|
"editDocumentsByFunction": false,
|
||||||
"containsFilter": false,
|
"containsFilter": false,
|
||||||
"network": false,
|
"network": false,
|
||||||
"getTaskDocumentsRoute": false
|
"getTaskDocumentsRoute": false,
|
||||||
|
"compositeEmbedders": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -66,7 +69,8 @@ async fn experimental_features() {
|
|||||||
"editDocumentsByFunction": false,
|
"editDocumentsByFunction": false,
|
||||||
"containsFilter": false,
|
"containsFilter": false,
|
||||||
"network": false,
|
"network": false,
|
||||||
"getTaskDocumentsRoute": false
|
"getTaskDocumentsRoute": false,
|
||||||
|
"compositeEmbedders": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -81,7 +85,8 @@ async fn experimental_features() {
|
|||||||
"editDocumentsByFunction": false,
|
"editDocumentsByFunction": false,
|
||||||
"containsFilter": false,
|
"containsFilter": false,
|
||||||
"network": false,
|
"network": false,
|
||||||
"getTaskDocumentsRoute": false
|
"getTaskDocumentsRoute": false,
|
||||||
|
"compositeEmbedders": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
@ -103,7 +108,8 @@ async fn experimental_feature_metrics() {
|
|||||||
"editDocumentsByFunction": false,
|
"editDocumentsByFunction": false,
|
||||||
"containsFilter": false,
|
"containsFilter": false,
|
||||||
"network": false,
|
"network": false,
|
||||||
"getTaskDocumentsRoute": false
|
"getTaskDocumentsRoute": false,
|
||||||
|
"compositeEmbedders": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -158,7 +164,7 @@ async fn errors() {
|
|||||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`",
|
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`, `compositeEmbedders`",
|
||||||
"code": "bad_request",
|
"code": "bad_request",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||||
|
@ -412,6 +412,117 @@ async fn ollama_url_checks() {
|
|||||||
async fn composite_checks() {
|
async fn composite_checks() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
let index = server.index("test");
|
let index = server.index("test");
|
||||||
|
// feature not enabled, using source
|
||||||
|
let (response, _code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"test": null
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"test": {
|
||||||
|
"source": "composite",
|
||||||
|
"searchEmbedder": {
|
||||||
|
"source": "huggingFace",
|
||||||
|
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
|
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||||
|
},
|
||||||
|
"indexingEmbedder": {
|
||||||
|
"source": "huggingFace",
|
||||||
|
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
|
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "using `\"composite\"` as source requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
|
||||||
|
"code": "feature_not_enabled",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// feature not enabled, using search embedder
|
||||||
|
let (response, _code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"test": null
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"test": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"searchEmbedder": {
|
||||||
|
"source": "huggingFace",
|
||||||
|
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
|
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "setting `searchEmbedder` requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
|
||||||
|
"code": "feature_not_enabled",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// feature not enabled, using indexing embedder
|
||||||
|
let (response, _code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"test": null
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"test": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"indexingEmbedder": {
|
||||||
|
"source": "huggingFace",
|
||||||
|
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
|
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"message": "setting `indexingEmbedder` requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
|
||||||
|
"code": "feature_not_enabled",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// enable feature
|
||||||
|
let (_, code) = server.set_features(json!({"compositeEmbedders": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
|
||||||
// inner distribution
|
// inner distribution
|
||||||
let (response, _code) = index
|
let (response, _code) = index
|
||||||
.update_settings(json!({
|
.update_settings(json!({
|
||||||
|
Loading…
x
Reference in New Issue
Block a user