Normalize the vectors during indexation and search

This commit is contained in:
Kerollmops 2023-06-20 11:45:29 +02:00 committed by Clément Renault
parent 321ec5f3fa
commit ab9f2269aa
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
4 changed files with 21 additions and 5 deletions

View file

@ -283,7 +283,7 @@ fn send_and_extract_flattened_documents_data(
faceted_fields: &HashSet<FieldId>,
primary_key_id: FieldId,
geo_fields_ids: Option<(FieldId, FieldId)>,
vector_field_id: Option<FieldId>,
vectors_field_id: Option<FieldId>,
stop_words: &Option<fst::Set<&[u8]>>,
max_positions_per_attributes: Option<u32>,
) -> Result<(
@ -312,11 +312,11 @@ fn send_and_extract_flattened_documents_data(
});
}
if let Some(vector_field_id) = vector_field_id {
if let Some(vectors_field_id) = vectors_field_id {
let documents_chunk_cloned = flattened_documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
rayon::spawn(move || {
let result = extract_vector_points(documents_chunk_cloned, indexer, vector_field_id);
let result = extract_vector_points(documents_chunk_cloned, indexer, vectors_field_id);
let _ = match result {
Ok(vector_points) => {
lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))

View file

@ -19,6 +19,7 @@ use super::helpers::{
use super::{ClonableMmap, MergeFn};
use crate::error::UserError;
use crate::facet::FacetType;
use crate::normalize_vector;
use crate::update::facet::FacetsUpdate;
use crate::update::index_documents::helpers::{as_cloneable_grenad, try_split_array_at};
use crate::{lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result, BEU32};
@ -253,6 +254,7 @@ pub(crate) fn write_typed_chunk_into_index(
return Err(UserError::InvalidVectorDimensions { expected, found })?;
}
let vector = normalize_vector(vector);
let vector_id = hnsw.insert(vector, &mut searcher) as u32;
index.vector_id_docid.put(wtxn, &BEU32::new(vector_id), &BEU32::new(docid))?;
}