mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-05-19 10:42:54 +02:00
Merge pull request #5449 from vuthanhtung2412/fix-dim-mismatch
Display more detailed error message instead of panic on embeddings dimension mismatch
This commit is contained in:
commit
5607802fe1
@ -100,7 +100,7 @@ async fn add_remove_user_provided() {
|
|||||||
let (documents, _code) = index
|
let (documents, _code) = index
|
||||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||||
.await;
|
.await;
|
||||||
snapshot!(json_string!(documents), @r###"
|
snapshot!(json_string!(documents), @r#"
|
||||||
{
|
{
|
||||||
"results": [
|
"results": [
|
||||||
{
|
{
|
||||||
@ -134,7 +134,7 @@ async fn add_remove_user_provided() {
|
|||||||
"limit": 20,
|
"limit": 20,
|
||||||
"total": 2
|
"total": 2
|
||||||
}
|
}
|
||||||
"###);
|
"#);
|
||||||
|
|
||||||
let (value, code) = index.delete_document(0).await;
|
let (value, code) = index.delete_document(0).await;
|
||||||
snapshot!(code, @"202 Accepted");
|
snapshot!(code, @"202 Accepted");
|
||||||
@ -143,7 +143,7 @@ async fn add_remove_user_provided() {
|
|||||||
let (documents, _code) = index
|
let (documents, _code) = index
|
||||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||||
.await;
|
.await;
|
||||||
snapshot!(json_string!(documents), @r###"
|
snapshot!(json_string!(documents), @r#"
|
||||||
{
|
{
|
||||||
"results": [
|
"results": [
|
||||||
{
|
{
|
||||||
@ -161,6 +161,97 @@ async fn add_remove_user_provided() {
|
|||||||
"limit": 20,
|
"limit": 20,
|
||||||
"total": 1
|
"total": 1
|
||||||
}
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn user_provide_mismatched_embedding_dimension() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let documents = json!([
|
||||||
|
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0] }},
|
||||||
|
]);
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let task = index.wait_task(value.uid()).await;
|
||||||
|
snapshot!(task, @r#"
|
||||||
|
{
|
||||||
|
"uid": "[uid]",
|
||||||
|
"batchUid": "[batch_uid]",
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "failed",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 1,
|
||||||
|
"indexedDocuments": 0
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"message": "Index `doggo`: Invalid vector dimensions: expected: `3`, found: `2`.",
|
||||||
|
"code": "invalid_vector_dimensions",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_vector_dimensions"
|
||||||
|
},
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
|
||||||
|
// FIXME: /!\ Case where number of embeddings is divisor of `dimensions` would still pass
|
||||||
|
let new_document = json!([
|
||||||
|
{"id": 0, "name": "kefir", "_vectors": { "manual": [[0, 0], [1, 1], [2, 2]] }},
|
||||||
|
]);
|
||||||
|
let (response, code) = index.add_documents(new_document, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(response.uid()).await.succeeded();
|
||||||
|
let (documents, _code) = index
|
||||||
|
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||||
|
.await;
|
||||||
|
snapshot!(json_string!(documents), @r###"
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"name": "kefir",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": {
|
||||||
|
"embeddings": [
|
||||||
|
[
|
||||||
|
0.0,
|
||||||
|
0.0,
|
||||||
|
1.0
|
||||||
|
],
|
||||||
|
[
|
||||||
|
1.0,
|
||||||
|
2.0,
|
||||||
|
2.0
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"regenerate": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"offset": 0,
|
||||||
|
"limit": 20,
|
||||||
|
"total": 1
|
||||||
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -678,7 +769,7 @@ async fn add_remove_one_vector_4588() {
|
|||||||
let (documents, _code) = index
|
let (documents, _code) = index
|
||||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||||
.await;
|
.await;
|
||||||
snapshot!(json_string!(documents), @r###"
|
snapshot!(json_string!(documents), @r#"
|
||||||
{
|
{
|
||||||
"results": [
|
"results": [
|
||||||
{
|
{
|
||||||
@ -696,5 +787,5 @@ async fn add_remove_one_vector_4588() {
|
|||||||
"limit": 20,
|
"limit": 20,
|
||||||
"total": 1
|
"total": 1
|
||||||
}
|
}
|
||||||
"###);
|
"#);
|
||||||
}
|
}
|
||||||
|
@ -13,7 +13,7 @@ use crate::index::IndexEmbeddingConfig;
|
|||||||
use crate::progress::Progress;
|
use crate::progress::Progress;
|
||||||
use crate::update::settings::InnerIndexSettings;
|
use crate::update::settings::InnerIndexSettings;
|
||||||
use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings};
|
use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings};
|
||||||
use crate::{Error, Index, InternalError, Result};
|
use crate::{Error, Index, InternalError, Result, UserError};
|
||||||
|
|
||||||
pub fn write_to_db(
|
pub fn write_to_db(
|
||||||
mut writer_receiver: WriterBbqueueReceiver<'_>,
|
mut writer_receiver: WriterBbqueueReceiver<'_>,
|
||||||
@ -218,7 +218,12 @@ pub fn write_from_bbqueue(
|
|||||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||||
let mut embeddings = Embeddings::new(*dimensions);
|
let mut embeddings = Embeddings::new(*dimensions);
|
||||||
let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding);
|
let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding);
|
||||||
embeddings.append(all_embeddings.to_vec()).unwrap();
|
if embeddings.append(all_embeddings.to_vec()).is_err() {
|
||||||
|
return Err(Error::UserError(UserError::InvalidVectorDimensions {
|
||||||
|
expected: *dimensions,
|
||||||
|
found: all_embeddings.len(),
|
||||||
|
}));
|
||||||
|
}
|
||||||
writer.del_items(wtxn, *dimensions, docid)?;
|
writer.del_items(wtxn, *dimensions, docid)?;
|
||||||
writer.add_items(wtxn, docid, &embeddings)?;
|
writer.add_items(wtxn, docid, &embeddings)?;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user