Pass the fields <-> ids map with metadata to render

This commit is contained in:
Louis Dureuil 2024-09-02 11:30:10 +02:00
parent 915cf4bae5
commit 580ea2f450
No known key found for this signature in database

View File

@ -15,14 +15,14 @@ use serde_json::Value;
use super::helpers::{create_writer, writer_into_reader, GrenadParameters}; use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
use crate::error::FaultSource; use crate::error::FaultSource;
use crate::index::IndexEmbeddingConfig; use crate::index::IndexEmbeddingConfig;
use crate::prompt::Prompt; use crate::prompt::{FieldsIdsMapWithMetadata, Prompt};
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::settings::InnerIndexSettingsDiff; use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution}; use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME}; use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME};
use crate::vector::settings::{EmbedderAction, ReindexAction}; use crate::vector::settings::{EmbedderAction, ReindexAction};
use crate::vector::{Embedder, Embeddings}; use crate::vector::{Embedder, Embeddings};
use crate::{try_split_array_at, DocumentId, FieldId, FieldsIdsMap, Result, ThreadPoolNoAbort}; use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort};
/// The length of the elements that are always in the buffer when inserting new values. /// The length of the elements that are always in the buffer when inserting new values.
const TRUNCATE_SIZE: usize = size_of::<DocumentId>(); const TRUNCATE_SIZE: usize = size_of::<DocumentId>();
@ -189,7 +189,13 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
let reindex_vectors = settings_diff.reindex_vectors(); let reindex_vectors = settings_diff.reindex_vectors();
let old_fields_ids_map = &settings_diff.old.fields_ids_map; let old_fields_ids_map = &settings_diff.old.fields_ids_map;
let old_fields_ids_map =
FieldsIdsMapWithMetadata::new(old_fields_ids_map, &settings_diff.old.searchable_fields_ids);
let new_fields_ids_map = &settings_diff.new.fields_ids_map; let new_fields_ids_map = &settings_diff.new.fields_ids_map;
let new_fields_ids_map =
FieldsIdsMapWithMetadata::new(new_fields_ids_map, &settings_diff.new.searchable_fields_ids);
// the vector field id may have changed // the vector field id may have changed
let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME); let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
@ -376,7 +382,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
); );
continue; continue;
} }
regenerate_prompt(obkv, prompt, new_fields_ids_map)? regenerate_prompt(obkv, prompt, &new_fields_ids_map)?
} }
}, },
// prompt regeneration is only triggered for existing embedders // prompt regeneration is only triggered for existing embedders
@ -393,7 +399,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
regenerate_if_prompt_changed( regenerate_if_prompt_changed(
obkv, obkv,
(old_prompt, prompt), (old_prompt, prompt),
(old_fields_ids_map, new_fields_ids_map), (&old_fields_ids_map, &new_fields_ids_map),
)? )?
} else { } else {
// we can simply ignore user provided vectors as they are not regenerated and are // we can simply ignore user provided vectors as they are not regenerated and are
@ -409,7 +415,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
prompt, prompt,
(add_to_user_provided, remove_from_user_provided), (add_to_user_provided, remove_from_user_provided),
(old, new), (old, new),
(old_fields_ids_map, new_fields_ids_map), (&old_fields_ids_map, &new_fields_ids_map),
document_id, document_id,
embedder_name, embedder_name,
embedder_is_manual, embedder_is_manual,
@ -479,7 +485,10 @@ fn extract_vector_document_diff(
prompt: &Prompt, prompt: &Prompt,
(add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap), (add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap),
(old, new): (VectorState, VectorState), (old, new): (VectorState, VectorState),
(old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap), (old_fields_ids_map, new_fields_ids_map): (
&FieldsIdsMapWithMetadata,
&FieldsIdsMapWithMetadata,
),
document_id: impl Fn() -> Value, document_id: impl Fn() -> Value,
embedder_name: &str, embedder_name: &str,
embedder_is_manual: bool, embedder_is_manual: bool,
@ -599,7 +608,10 @@ fn extract_vector_document_diff(
fn regenerate_if_prompt_changed( fn regenerate_if_prompt_changed(
obkv: obkv::KvReader<'_, FieldId>, obkv: obkv::KvReader<'_, FieldId>,
(old_prompt, new_prompt): (&Prompt, &Prompt), (old_prompt, new_prompt): (&Prompt, &Prompt),
(old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap), (old_fields_ids_map, new_fields_ids_map): (
&FieldsIdsMapWithMetadata,
&FieldsIdsMapWithMetadata,
),
) -> Result<VectorStateDelta> { ) -> Result<VectorStateDelta> {
let old_prompt = let old_prompt =
old_prompt.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or(Default::default()); old_prompt.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or(Default::default());
@ -614,7 +626,7 @@ fn regenerate_if_prompt_changed(
fn regenerate_prompt( fn regenerate_prompt(
obkv: obkv::KvReader<'_, FieldId>, obkv: obkv::KvReader<'_, FieldId>,
prompt: &Prompt, prompt: &Prompt,
new_fields_ids_map: &FieldsIdsMap, new_fields_ids_map: &FieldsIdsMapWithMetadata,
) -> Result<VectorStateDelta> { ) -> Result<VectorStateDelta> {
let prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?; let prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;