From af9f96e2af861b83f798d0654bd9bd27e9b700dd Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 28 Oct 2024 14:22:45 +0100 Subject: [PATCH] Update older embedding --- .../extract/extract_vector_points.rs | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index 03843fcd8..7b5bf3f40 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -21,7 +21,7 @@ use crate::update::settings::InnerIndexSettingsDiff; use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution}; use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME}; use crate::vector::settings::ReindexAction; -use crate::vector::{Embedder, Embeddings}; +use crate::vector::{Embedder, Embedding}; use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort}; /// The length of the elements that are always in the buffer when inserting new values. @@ -536,9 +536,11 @@ fn extract_vector_document_diff( } // Don't give up if the old prompt was failing let old_prompt = Some(&prompt).map(|p| { - p.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or_default() + p.render_kvdeladd(obkv, DelAdd::Deletion, old_fields_ids_map) + .unwrap_or_default() }); - let new_prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?; + let new_prompt = + prompt.render_kvdeladd(obkv, DelAdd::Addition, new_fields_ids_map)?; if old_prompt.as_ref() != Some(&new_prompt) { let old_prompt = old_prompt.unwrap_or_default(); tracing::trace!( @@ -570,7 +572,7 @@ fn extract_vector_document_diff( return Ok(VectorStateDelta::NoChange); } // becomes autogenerated - VectorStateDelta::NowGenerated(prompt.render( + VectorStateDelta::NowGenerated(prompt.render_kvdeladd( obkv, DelAdd::Addition, new_fields_ids_map, @@ -613,9 +615,10 @@ fn regenerate_if_prompt_changed( &FieldsIdsMapWithMetadata, ), ) -> Result { - let old_prompt = - old_prompt.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or(Default::default()); - let new_prompt = new_prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?; + let old_prompt = old_prompt + .render_kvdeladd(obkv, DelAdd::Deletion, old_fields_ids_map) + .unwrap_or(Default::default()); + let new_prompt = new_prompt.render_kvdeladd(obkv, DelAdd::Addition, new_fields_ids_map)?; if new_prompt == old_prompt { return Ok(VectorStateDelta::NoChange); @@ -628,7 +631,7 @@ fn regenerate_prompt( prompt: &Prompt, new_fields_ids_map: &FieldsIdsMapWithMetadata, ) -> Result { - let prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?; + let prompt = prompt.render_kvdeladd(obkv, DelAdd::Addition, new_fields_ids_map)?; Ok(VectorStateDelta::NowGenerated(prompt)) } @@ -738,7 +741,7 @@ pub fn extract_embeddings( .flat_map(|docids| docids.iter()) .zip(chunked_embeds.iter().flat_map(|embeds| embeds.iter())) { - state_writer.insert(docid.to_be_bytes(), cast_slice(embeddings.as_inner()))?; + state_writer.insert(docid.to_be_bytes(), cast_slice(embeddings))?; } chunks_ids.clear(); } @@ -759,7 +762,7 @@ pub fn extract_embeddings( .flat_map(|docids| docids.iter()) .zip(chunked_embeds.iter().flat_map(|embeds| embeds.iter())) { - state_writer.insert(docid.to_be_bytes(), cast_slice(embeddings.as_inner()))?; + state_writer.insert(docid.to_be_bytes(), cast_slice(embeddings))?; } } @@ -775,7 +778,7 @@ pub fn extract_embeddings( if let Some(embeds) = embeds.first() { for (docid, embeddings) in current_chunk_ids.iter().zip(embeds.iter()) { - state_writer.insert(docid.to_be_bytes(), cast_slice(embeddings.as_inner()))?; + state_writer.insert(docid.to_be_bytes(), cast_slice(embeddings))?; } } } @@ -790,7 +793,7 @@ fn embed_chunks( possible_embedding_mistakes: &PossibleEmbeddingMistakes, unused_vectors_distribution: &UnusedVectorsDistribution, request_threads: &ThreadPoolNoAbort, -) -> Result>>> { +) -> Result>> { match embedder.embed_chunks(text_chunks, request_threads) { Ok(chunks) => Ok(chunks), Err(error) => {