From 3a78e988dad814f0b781e568d349ecceb20be0a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 30 May 2024 11:17:03 +0200 Subject: [PATCH] Reduce the number of complex calls to settings diff functions --- milli/src/update/index_documents/transform.rs | 14 ++-- .../src/update/index_documents/typed_chunk.rs | 2 +- milli/src/update/settings.rs | 69 ++++++++++++------- 3 files changed, 53 insertions(+), 32 deletions(-) diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 34e40b7f6..59bab36e8 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -808,13 +808,15 @@ impl<'a, 'i> Transform<'a, 'i> { let mut new_inner_settings = old_inner_settings.clone(); new_inner_settings.fields_ids_map = fields_ids_map; - let settings_diff = InnerIndexSettingsDiff { - old: old_inner_settings, - new: new_inner_settings, + let embedding_configs_updated = false; + let settings_update_only = false; + let settings_diff = InnerIndexSettingsDiff::new( + old_inner_settings, + new_inner_settings, primary_key_id, - embedding_configs_updated: false, - settings_update_only: false, - }; + embedding_configs_updated, + settings_update_only, + ); Ok(TransformOutput { primary_key, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 4021dea08..2fbe91685 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -489,7 +489,7 @@ pub(crate) fn write_typed_chunk_into_index( } let merger = builder.build(); - if settings_diff.only_additional_fields().is_some() { + if settings_diff.only_additional_fields.is_some() { write_proximity_entries_into_database_additional_searchables( merger, &index.word_pair_proximity_docids, diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index b401adff9..84eccf8e6 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1073,13 +1073,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { .index .primary_key(self.wtxn)? .and_then(|name| new_inner_settings.fields_ids_map.id(name)); - let inner_settings_diff = InnerIndexSettingsDiff { - old: old_inner_settings, - new: new_inner_settings, + let settings_update_only = true; + let inner_settings_diff = InnerIndexSettingsDiff::new( + old_inner_settings, + new_inner_settings, primary_key_id, embedding_configs_updated, - settings_update_only: true, - }; + settings_update_only, + ); if inner_settings_diff.any_reindexing_needed() { self.reindex(&progress_callback, &should_abort, inner_settings_diff)?; @@ -1096,9 +1097,46 @@ pub struct InnerIndexSettingsDiff { // TODO: compare directly the embedders. pub(crate) embedding_configs_updated: bool, pub(crate) settings_update_only: bool, + /// The set of only the additional searchable fields. + /// If any other searchable field has been modified, is set to None. + pub(crate) only_additional_fields: Option>, } impl InnerIndexSettingsDiff { + pub(crate) fn new( + old_settings: InnerIndexSettings, + new_settings: InnerIndexSettings, + primary_key_id: Option, + embedding_configs_updated: bool, + settings_update_only: bool, + ) -> Self { + let only_additional_fields = match ( + &old_settings.user_defined_searchable_fields, + &new_settings.user_defined_searchable_fields, + ) { + (None, None) | (Some(_), None) | (None, Some(_)) => None, // None means * + (Some(old), Some(new)) => { + let old: HashSet<_> = old.iter().cloned().collect(); + let new: HashSet<_> = new.iter().cloned().collect(); + if old.difference(&new).next().is_none() { + // if no field has been removed return only the additional ones + Some(&new - &old) + } else { + None + } + } + }; + + InnerIndexSettingsDiff { + old: old_settings, + new: new_settings, + primary_key_id, + embedding_configs_updated, + settings_update_only, + only_additional_fields, + } + } + pub fn any_reindexing_needed(&self) -> bool { self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors() } @@ -1123,7 +1161,7 @@ impl InnerIndexSettingsDiff { || self.old.proximity_precision != self.new.proximity_precision { Some(DelAddOperation::DeletionAndAddition) - } else if let Some(only_additional_fields) = self.only_additional_fields() { + } else if let Some(only_additional_fields) = &self.only_additional_fields { let additional_field = self.new.fields_ids_map.name(id).unwrap(); if only_additional_fields.contains(additional_field) { Some(DelAddOperation::Addition) @@ -1138,25 +1176,6 @@ impl InnerIndexSettingsDiff { } } - /// Returns only the additional searchable fields. - /// If any other searchable field has been modified, returns None. - pub fn only_additional_fields(&self) -> Option> { - match (&self.old.user_defined_searchable_fields, &self.new.user_defined_searchable_fields) { - (None, None) | (Some(_), None) | (None, Some(_)) => None, // None means * - (Some(old), Some(new)) => { - let old: HashSet<_> = old.iter().cloned().collect(); - let new: HashSet<_> = new.iter().cloned().collect(); - if old.difference(&new).next().is_none() { - // if no field has been removed - // return only the additional ones - Some(&new - &old) - } else { - None - } - } - } - } - pub fn reindex_facets(&self) -> bool { let existing_fields = &self.new.existing_fields; if existing_fields.iter().any(|field| field.contains('.')) {