Merge pull request #5449 from vuthanhtung2412/fix-dim-mismatch

Display more detailed error message instead of panic on embeddings dimension mismatch
This commit is contained in:
Tamo 2025-03-27 10:52:23 +00:00 committed by GitHub
commit 5607802fe1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 103 additions and 7 deletions

View File

@ -100,7 +100,7 @@ async fn add_remove_user_provided() {
let (documents, _code) = index let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await; .await;
snapshot!(json_string!(documents), @r###" snapshot!(json_string!(documents), @r#"
{ {
"results": [ "results": [
{ {
@ -134,7 +134,7 @@ async fn add_remove_user_provided() {
"limit": 20, "limit": 20,
"total": 2 "total": 2
} }
"###); "#);
let (value, code) = index.delete_document(0).await; let (value, code) = index.delete_document(0).await;
snapshot!(code, @"202 Accepted"); snapshot!(code, @"202 Accepted");
@ -143,7 +143,7 @@ async fn add_remove_user_provided() {
let (documents, _code) = index let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await; .await;
snapshot!(json_string!(documents), @r###" snapshot!(json_string!(documents), @r#"
{ {
"results": [ "results": [
{ {
@ -161,6 +161,97 @@ async fn add_remove_user_provided() {
"limit": 20, "limit": 20,
"total": 1 "total": 1
} }
"#);
}
#[actix_rt::test]
async fn user_provide_mismatched_embedding_dimension() {
let server = Server::new().await;
let index = server.index("doggo");
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0] }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r#"
{
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Invalid vector dimensions: expected: `3`, found: `2`.",
"code": "invalid_vector_dimensions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vector_dimensions"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"#);
// FIXME: /!\ Case where number of embeddings is divisor of `dimensions` would still pass
let new_document = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [[0, 0], [1, 1], [2, 2]] }},
]);
let (response, code) = index.add_documents(new_document, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
0.0,
0.0,
1.0
],
[
1.0,
2.0,
2.0
]
],
"regenerate": false
}
}
}
],
"offset": 0,
"limit": 20,
"total": 1
}
"###); "###);
} }
@ -678,7 +769,7 @@ async fn add_remove_one_vector_4588() {
let (documents, _code) = index let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await; .await;
snapshot!(json_string!(documents), @r###" snapshot!(json_string!(documents), @r#"
{ {
"results": [ "results": [
{ {
@ -696,5 +787,5 @@ async fn add_remove_one_vector_4588() {
"limit": 20, "limit": 20,
"total": 1 "total": 1
} }
"###); "#);
} }

View File

@ -13,7 +13,7 @@ use crate::index::IndexEmbeddingConfig;
use crate::progress::Progress; use crate::progress::Progress;
use crate::update::settings::InnerIndexSettings; use crate::update::settings::InnerIndexSettings;
use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings}; use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings};
use crate::{Error, Index, InternalError, Result}; use crate::{Error, Index, InternalError, Result, UserError};
pub fn write_to_db( pub fn write_to_db(
mut writer_receiver: WriterBbqueueReceiver<'_>, mut writer_receiver: WriterBbqueueReceiver<'_>,
@ -218,7 +218,12 @@ pub fn write_from_bbqueue(
arroy_writers.get(&embedder_id).expect("requested a missing embedder"); arroy_writers.get(&embedder_id).expect("requested a missing embedder");
let mut embeddings = Embeddings::new(*dimensions); let mut embeddings = Embeddings::new(*dimensions);
let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding); let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding);
embeddings.append(all_embeddings.to_vec()).unwrap(); if embeddings.append(all_embeddings.to_vec()).is_err() {
return Err(Error::UserError(UserError::InvalidVectorDimensions {
expected: *dimensions,
found: all_embeddings.len(),
}));
}
writer.del_items(wtxn, *dimensions, docid)?; writer.del_items(wtxn, *dimensions, docid)?;
writer.add_items(wtxn, docid, &embeddings)?; writer.add_items(wtxn, docid, &embeddings)?;
} }