From 580ea2f45078c42c981e37e3f71b9972fcbed0a8 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 2 Sep 2024 11:30:10 +0200 Subject: [PATCH] Pass the fields <-> ids map with metadata to render --- .../extract/extract_vector_points.rs | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index f66c3fd46..e9b83b92c 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -15,14 +15,14 @@ use serde_json::Value; use super::helpers::{create_writer, writer_into_reader, GrenadParameters}; use crate::error::FaultSource; use crate::index::IndexEmbeddingConfig; -use crate::prompt::Prompt; +use crate::prompt::{FieldsIdsMapWithMetadata, Prompt}; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::settings::InnerIndexSettingsDiff; use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution}; use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME}; use crate::vector::settings::{EmbedderAction, ReindexAction}; use crate::vector::{Embedder, Embeddings}; -use crate::{try_split_array_at, DocumentId, FieldId, FieldsIdsMap, Result, ThreadPoolNoAbort}; +use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort}; /// The length of the elements that are always in the buffer when inserting new values. const TRUNCATE_SIZE: usize = size_of::(); @@ -189,7 +189,13 @@ pub fn extract_vector_points( let reindex_vectors = settings_diff.reindex_vectors(); let old_fields_ids_map = &settings_diff.old.fields_ids_map; + let old_fields_ids_map = + FieldsIdsMapWithMetadata::new(old_fields_ids_map, &settings_diff.old.searchable_fields_ids); + let new_fields_ids_map = &settings_diff.new.fields_ids_map; + let new_fields_ids_map = + FieldsIdsMapWithMetadata::new(new_fields_ids_map, &settings_diff.new.searchable_fields_ids); + // the vector field id may have changed let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME); @@ -376,7 +382,7 @@ pub fn extract_vector_points( ); continue; } - regenerate_prompt(obkv, prompt, new_fields_ids_map)? + regenerate_prompt(obkv, prompt, &new_fields_ids_map)? } }, // prompt regeneration is only triggered for existing embedders @@ -393,7 +399,7 @@ pub fn extract_vector_points( regenerate_if_prompt_changed( obkv, (old_prompt, prompt), - (old_fields_ids_map, new_fields_ids_map), + (&old_fields_ids_map, &new_fields_ids_map), )? } else { // we can simply ignore user provided vectors as they are not regenerated and are @@ -409,7 +415,7 @@ pub fn extract_vector_points( prompt, (add_to_user_provided, remove_from_user_provided), (old, new), - (old_fields_ids_map, new_fields_ids_map), + (&old_fields_ids_map, &new_fields_ids_map), document_id, embedder_name, embedder_is_manual, @@ -479,7 +485,10 @@ fn extract_vector_document_diff( prompt: &Prompt, (add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap), (old, new): (VectorState, VectorState), - (old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap), + (old_fields_ids_map, new_fields_ids_map): ( + &FieldsIdsMapWithMetadata, + &FieldsIdsMapWithMetadata, + ), document_id: impl Fn() -> Value, embedder_name: &str, embedder_is_manual: bool, @@ -599,7 +608,10 @@ fn extract_vector_document_diff( fn regenerate_if_prompt_changed( obkv: obkv::KvReader<'_, FieldId>, (old_prompt, new_prompt): (&Prompt, &Prompt), - (old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap), + (old_fields_ids_map, new_fields_ids_map): ( + &FieldsIdsMapWithMetadata, + &FieldsIdsMapWithMetadata, + ), ) -> Result { let old_prompt = old_prompt.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or(Default::default()); @@ -614,7 +626,7 @@ fn regenerate_if_prompt_changed( fn regenerate_prompt( obkv: obkv::KvReader<'_, FieldId>, prompt: &Prompt, - new_fields_ids_map: &FieldsIdsMap, + new_fields_ids_map: &FieldsIdsMapWithMetadata, ) -> Result { let prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;