Introduce a new error message for invalid vector dimensions

This commit is contained in:
Kerollmops 2023-06-14 16:34:09 +02:00 committed by Clément Renault
parent 3b560ef7d0
commit a7e0f0de89
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
3 changed files with 23 additions and 1 deletions

View File

@ -217,6 +217,7 @@ InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ; MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ; InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ; InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
InvalidDocumentId , InvalidRequest , BAD_REQUEST ; InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ; InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ;
InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ; InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
@ -335,6 +336,7 @@ impl ErrorCode for milli::Error {
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort, UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules, UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField, UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
UserError::SortError(_) => Code::InvalidSearchSort, UserError::SortError(_) => Code::InvalidSearchSort,
UserError::InvalidMinTypoWordLenSetting(_, _) => { UserError::InvalidMinTypoWordLenSetting(_, _) => {
Code::InvalidSettingsTypoTolerance Code::InvalidSettingsTypoTolerance

View File

@ -110,9 +110,11 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
}, },
#[error(transparent)] #[error(transparent)]
InvalidGeoField(#[from] GeoError), InvalidGeoField(#[from] GeoError),
#[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
InvalidVectorDimensions { expected: usize, found: usize },
#[error("{0}")] #[error("{0}")]
InvalidFilter(String), InvalidFilter(String),
#[error("Invalid type for filter subexpression: `expected {}, found: {1}`.", .0.join(", "))] #[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
InvalidFilterExpression(&'static [&'static str], Value), InvalidFilterExpression(&'static [&'static str], Value),
#[error("Attribute `{}` is not sortable. {}", #[error("Attribute `{}` is not sortable. {}",
.field, .field,

View File

@ -11,11 +11,13 @@ use heed::types::ByteSlice;
use heed::RwTxn; use heed::RwTxn;
use hnsw::Searcher; use hnsw::Searcher;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use space::KnnPoints;
use super::helpers::{ use super::helpers::{
self, merge_ignore_values, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap, self, merge_ignore_values, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap,
}; };
use super::{ClonableMmap, MergeFn}; use super::{ClonableMmap, MergeFn};
use crate::error::UserError;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::update::facet::FacetsUpdate; use crate::update::facet::FacetsUpdate;
use crate::update::index_documents::helpers::as_cloneable_grenad; use crate::update::index_documents::helpers::as_cloneable_grenad;
@ -228,12 +230,28 @@ pub(crate) fn write_typed_chunk_into_index(
let mut hnsw = index.vector_hnsw(wtxn)?.unwrap_or_default(); let mut hnsw = index.vector_hnsw(wtxn)?.unwrap_or_default();
let mut searcher = Searcher::new(); let mut searcher = Searcher::new();
let mut expected_dimensions = match index.vector_id_docid.iter(wtxn)?.next() {
Some(result) => {
let (vector_id, _) = result?;
Some(hnsw.get_point(vector_id.get() as usize).len())
}
None => None,
};
let mut cursor = vector_points.into_cursor()?; let mut cursor = vector_points.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? { while let Some((key, value)) = cursor.move_on_next()? {
// convert the key back to a u32 (4 bytes) // convert the key back to a u32 (4 bytes)
let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap(); let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
// convert the vector back to a Vec<f32> // convert the vector back to a Vec<f32>
let vector: Vec<f32> = pod_collect_to_vec(value); let vector: Vec<f32> = pod_collect_to_vec(value);
// TODO Move this error in the vector extractor
let found = vector.len();
let expected = *expected_dimensions.get_or_insert(found);
if expected != found {
return Err(UserError::InvalidVectorDimensions { expected, found })?;
}
let vector_id = hnsw.insert(vector, &mut searcher) as u32; let vector_id = hnsw.insert(vector, &mut searcher) as u32;
index.vector_id_docid.put(wtxn, &BEU32::new(vector_id), &BEU32::new(docid))?; index.vector_id_docid.put(wtxn, &BEU32::new(vector_id), &BEU32::new(docid))?;
} }