5008: Display vectors when no custom vectors where ever provided r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes the issue reported on [Discord](https://discord.com/channels/1006923006964154428/1294653031958446080/1295336784896589967).

## What does this PR do?
- Normal behavior of Meilisearch is to hide `_vectors` even when `retrieveVectors: true` when there is an explicit list of displayed attributes that does not contain vectors
- However, this relied on the field id for the `_vectors` field to exist, which wasn't the case when no `_vectors` was manually provided to documents. This would often be the case for people using autoembedders such as the OpenAI integration.
- This PR fixes the behavior by looking for the `_vectors` string in the `displayedAttributes` when there is no `_vectors` fid.
- This PR also adds a test for this specific situation, that would fail before the PR, and pass after the PR


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-10-15 13:08:47 +00:00 committed by GitHub
commit 75b2f22add
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 58 additions and 2 deletions

View File

@ -1195,8 +1195,13 @@ impl<'a> HitMaker<'a> {
let vectors_is_hidden = match (&displayed_ids, vectors_fid) {
// displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
(None, _) => false,
// displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field
(Some(_), None) => true,
// vectors has no fid, so check its explicit name
(Some(_), None) => {
// unwrap as otherwise we'd go to the first one
let displayed_names = index.displayed_fields(rtxn)?.unwrap();
!displayed_names
.contains(&milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME)
}
// displayed_ids is a finit list, so hide if `_vectors` is not part of it
(Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
};

View File

@ -568,6 +568,57 @@ async fn retrieve_vectors() {
]
"###);
// use explicit `_vectors` in displayed attributes
let (response, code) = index
.update_settings(json!({ "displayedAttributes": ["id", "title", "desc", "_vectors"]} ))
.await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"}, @r###"
[
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2",
"_vectors": {
"default": {
"embeddings": "[vectors]",
"regenerate": true
}
}
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3",
"_vectors": {
"default": {
"embeddings": "[vectors]",
"regenerate": true
}
}
},
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
"_vectors": {
"default": {
"embeddings": "[vectors]",
"regenerate": true
}
}
}
]
"###);
// remove `_vectors` from displayed attributes
let (response, code) =
index.update_settings(json!({ "displayedAttributes": ["id", "title", "desc"]} )).await;