5355: Support fetching the pooling method from the model configuration r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #5354 

## What does this PR do?
- Fetches the pooling configuration from the model repository
- Use a pooling method that depends on the pooling configuration of that model.
- Allow overriding the pooling method with a new huggingFace embedder parameter `pooling`
  - for backward-compatibility with Meilisearch v1.13
  - for compatibility with embedders that exhibit the same behavior as Meilisearch v1.13
- Handle the default value of that new parameter
   - for compatibility, when importing a db/a dump, it should be set to `forceMean`
   - when (re)set from the settings for an embedder, it should be set to `useModel`


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2025-02-27 14:55:13 +00:00 committed by GitHub
commit c63c25a9a2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 302 additions and 55 deletions

View file

@ -2768,6 +2768,7 @@ mod tests {
source: Setting::Set(crate::vector::settings::EmbedderSource::UserProvided),
model: Setting::NotSet,
revision: Setting::NotSet,
pooling: Setting::NotSet,
api_key: Setting::NotSet,
dimensions: Setting::Set(3),
document_template: Setting::NotSet,

View file

@ -1676,6 +1676,7 @@ fn validate_prompt(
source,
model,
revision,
pooling,
api_key,
dimensions,
document_template: Setting::Set(template),
@ -1709,6 +1710,7 @@ fn validate_prompt(
source,
model,
revision,
pooling,
api_key,
dimensions,
document_template: Setting::Set(template),
@ -1735,6 +1737,7 @@ pub fn validate_embedding_settings(
source,
model,
revision,
pooling,
api_key,
dimensions,
document_template,
@ -1776,6 +1779,7 @@ pub fn validate_embedding_settings(
source,
model,
revision,
pooling,
api_key,
dimensions,
document_template,
@ -1791,6 +1795,7 @@ pub fn validate_embedding_settings(
match inferred_source {
EmbedderSource::OpenAi => {
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?;
check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
@ -1829,6 +1834,7 @@ pub fn validate_embedding_settings(
EmbedderSource::Ollama => {
check_set(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?;
check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
@ -1846,6 +1852,7 @@ pub fn validate_embedding_settings(
EmbedderSource::UserProvided => {
check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?;
check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?;
check_unset(
&document_template,
@ -1869,6 +1876,7 @@ pub fn validate_embedding_settings(
EmbedderSource::Rest => {
check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?;
check_set(&url, EmbeddingSettings::URL, inferred_source, name)?;
check_set(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
check_set(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
@ -1878,6 +1886,7 @@ pub fn validate_embedding_settings(
source,
model,
revision,
pooling,
api_key,
dimensions,
document_template,