From 0c6e4b2f0098cd4621ceda79402e5bc634a80c59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 28 May 2024 14:53:45 +0200 Subject: [PATCH] Introducing a new into_del_add_obkv_conditional_operation function --- milli/src/update/del_add.rs | 15 ++++++ milli/src/update/index_documents/transform.rs | 47 +++++++++++++++---- 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/milli/src/update/del_add.rs b/milli/src/update/del_add.rs index 0288858ed..cb5e448f1 100644 --- a/milli/src/update/del_add.rs +++ b/milli/src/update/del_add.rs @@ -40,11 +40,26 @@ pub fn into_del_add_obkv( operation: DelAddOperation, buffer: &mut Vec, ) -> Result<(), std::io::Error> { + into_del_add_obkv_conditional_operation(reader, buffer, |_| operation) +} + +/// Akin to the [into_del_add_obkv] function but lets you +/// conditionally define the `DelAdd` variant based on the obkv key. +pub fn into_del_add_obkv_conditional_operation( + reader: obkv::KvReader, + buffer: &mut Vec, + operation: F, +) -> std::io::Result<()> +where + K: obkv::Key + PartialOrd, + F: Fn(K) -> DelAddOperation, +{ let mut writer = obkv::KvWriter::new(buffer); let mut value_buffer = Vec::new(); for (key, value) in reader.iter() { value_buffer.clear(); let mut value_writer = KvWriterDelAdd::new(&mut value_buffer); + let operation = operation(key); if matches!(operation, DelAddOperation::Deletion | DelAddOperation::DeletionAndAddition) { value_writer.insert(DelAdd::Deletion, value)?; } diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 41a0a55cf..dc6642b8a 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -20,7 +20,10 @@ use super::{IndexDocumentsMethod, IndexerConfig}; use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader}; use crate::error::{Error, InternalError, UserError}; use crate::index::{db_name, main_key}; -use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd}; +use crate::update::del_add::{ + into_del_add_obkv, into_del_add_obkv_conditional_operation, DelAdd, DelAddOperation, + KvReaderDelAdd, +}; use crate::update::index_documents::GrenadParameters; use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; @@ -841,13 +844,28 @@ impl<'a, 'i> Transform<'a, 'i> { let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) }; // If only the `searchableAttributes` has been changed, keep only the searchable fields. + // However, if only new searchable attributes are added, this function will + // return false has fields do not need to be reindexed. let must_reindex_searchables = settings_diff.reindex_searchable(); - let necessary_searchable_field = |id: FieldId| -> bool { - must_reindex_searchables + let must_index_only_additional_searchables = &settings_diff.only_additional_fields(); + let necessary_searchable_field_to_reindex = move |id: FieldId| -> bool { + must_index_only_additional_searchables.is_none() + && must_reindex_searchables && (settings_diff.old.searchable_fields_ids.contains(&id) || settings_diff.new.searchable_fields_ids.contains(&id)) }; + // If only new `searchableAttributes` are present, keep only those ones. + let additional_searchable_field_only = move |id: FieldId| -> bool { + match must_index_only_additional_searchables { + Some(additional_fields) => { + let additional_field = settings_diff.new.fields_ids_map.name(id).unwrap(); + additional_fields.contains(additional_field) + } + None => false, + } + }; + // If only a faceted field has been added, keep only this field. let must_reindex_facets = settings_diff.reindex_facets(); let necessary_faceted_field = |id: FieldId| -> bool { @@ -862,14 +880,21 @@ impl<'a, 'i> Transform<'a, 'i> { // we need the fields for the prompt/templating. let reindex_vectors = settings_diff.reindex_vectors(); + // The set of additional searchable fields only, + // the only purpose of these fields is to be indexed from scratch. + let mut additional_searchables_only = HashSet::new(); + let mut obkv_writer = KvWriter::<_, FieldId>::memory(); for (id, val) in old_obkv.iter() { if is_primary_key(id) - || necessary_searchable_field(id) + || necessary_searchable_field_to_reindex(id) || necessary_faceted_field(id) || reindex_vectors { obkv_writer.insert(id, val)?; + } else if additional_searchable_field_only(id) { + additional_searchables_only.insert(id); + obkv_writer.insert(id, val)?; } } let data = obkv_writer.into_inner()?; @@ -887,11 +912,15 @@ impl<'a, 'i> Transform<'a, 'i> { let flattened = flattened.as_deref().map_or(obkv, KvReader::new); flattened_obkv_buffer.clear(); - into_del_add_obkv( - flattened, - DelAddOperation::DeletionAndAddition, - flattened_obkv_buffer, - )?; + into_del_add_obkv_conditional_operation(flattened, flattened_obkv_buffer, |id| { + // If the field is only required because it is an additional + // searchable field only define it as an DelAdd::Addition only. + if additional_searchables_only.contains(&id) { + DelAddOperation::Addition + } else { + DelAddOperation::DeletionAndAddition + } + })?; } Ok(())