4537: Expose distribution shift in settings r=ManyTheFish a=dureuill

See [usage page](https://meilisearch.notion.site/v1-8-AI-search-API-usage-135552d6e85a4a52bc7109be82aeca42#d652adc0890445658aaf36352dbc8802)

# Changes

- Distribution shift added to all embedders.
- Exposed in settings
- Changed the reindexing logic to not trigger a reindex operation when only the distribution shift or API key change

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-04-03 09:08:58 +00:00 committed by GitHub
commit 56bf8503db
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 231 additions and 95 deletions

View file

@ -2652,6 +2652,7 @@ mod tests {
path_to_embeddings: Setting::NotSet,
embedding_object: Setting::NotSet,
input_type: Setting::NotSet,
distribution: Setting::NotSet,
}),
);
settings.set_embedder_settings(embedders);

View file

@ -976,7 +976,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
match joined {
// updated config
EitherOrBoth::Both((name, mut old), (_, new)) => {
changed |= old.apply(new);
changed |= EmbeddingSettings::apply_and_need_reindex(&mut old, new);
if changed {
tracing::debug!(embedder = name, "need reindex");
} else {
tracing::debug!(embedder = name, "skip reindex");
}
let new = validate_embedding_settings(old, &name)?;
new_configs.insert(name, new);
}
@ -1169,6 +1174,7 @@ fn validate_prompt(
path_to_embeddings,
embedding_object,
input_type,
distribution,
}) => {
// validate
let template = crate::prompt::Prompt::new(template)
@ -1188,6 +1194,7 @@ fn validate_prompt(
path_to_embeddings,
embedding_object,
input_type,
distribution,
}))
}
new => Ok(new),
@ -1213,6 +1220,7 @@ pub fn validate_embedding_settings(
path_to_embeddings,
embedding_object,
input_type,
distribution,
} = settings;
if let Some(0) = dimensions.set() {
@ -1244,6 +1252,7 @@ pub fn validate_embedding_settings(
path_to_embeddings,
embedding_object,
input_type,
distribution,
}));
};
match inferred_source {
@ -1388,6 +1397,7 @@ pub fn validate_embedding_settings(
path_to_embeddings,
embedding_object,
input_type,
distribution,
}))
}