From b833be46b9b1076d3dc2b26d7a1e197e55e887d7 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 5 Jun 2024 12:48:44 +0200 Subject: [PATCH] Avoid running proximity when only the exact attributes changes --- .../extract_word_pair_proximity_docids.rs | 9 ++++---- milli/src/update/settings.rs | 22 +++++++++++++++---- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs index 617338f9f..5a9363942 100644 --- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs @@ -26,11 +26,8 @@ pub fn extract_word_pair_proximity_docids( indexer: GrenadParameters, settings_diff: &InnerIndexSettingsDiff, ) -> Result>> { - let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord; - let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord; - // early return if the data shouldn't be deleted nor created. - if !any_deletion && !any_addition { + if settings_diff.settings_update_only && !settings_diff.reindex_proximities() { let writer = create_writer( indexer.chunk_compression_type, indexer.chunk_compression_level, @@ -39,8 +36,10 @@ pub fn extract_word_pair_proximity_docids( return writer_into_reader(writer); } - let max_memory = indexer.max_memory_by_thread(); + let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord; + let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord; + let max_memory = indexer.max_memory_by_thread(); let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE) .map(|_| { create_sorter( diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 952b017c6..dc26ac746 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1104,8 +1104,10 @@ pub struct InnerIndexSettingsDiff { // Cache the check to see if all the stop_words, allowed_separators, dictionary, // exact_attributes, proximity_precision are different. pub(crate) cache_reindex_searchable_without_user_defined: bool, - // Cache the check to see if all the user_defined_searchables are different. + // Cache the check to see if the user_defined_searchables are different. pub(crate) cache_user_defined_searchables: bool, + // Cache the check to see if the exact_attributes are different. + pub(crate) cache_exact_attributes: bool, } impl InnerIndexSettingsDiff { @@ -1139,10 +1141,11 @@ impl InnerIndexSettingsDiff { != new_settings.stop_words.as_ref().map(|set| set.as_fst().as_bytes()) || old_settings.allowed_separators != new_settings.allowed_separators || old_settings.dictionary != new_settings.dictionary - || old_settings.exact_attributes != new_settings.exact_attributes || old_settings.proximity_precision != new_settings.proximity_precision }; + let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes; + let cache_user_defined_searchables = old_settings.user_defined_searchable_fields != new_settings.user_defined_searchable_fields; @@ -1155,6 +1158,7 @@ impl InnerIndexSettingsDiff { only_additional_fields, cache_reindex_searchable_without_user_defined, cache_user_defined_searchables, + cache_exact_attributes, } } @@ -1163,11 +1167,21 @@ impl InnerIndexSettingsDiff { } pub fn reindex_searchable(&self) -> bool { - self.cache_reindex_searchable_without_user_defined || self.cache_user_defined_searchables + self.cache_reindex_searchable_without_user_defined + || self.cache_exact_attributes + || self.cache_user_defined_searchables + } + + pub fn reindex_proximities(&self) -> bool { + // if any searchable settings force the reindexing + (self.cache_reindex_searchable_without_user_defined || self.cache_user_defined_searchables) + // and if any settings needs the proximity database created + && (self.old.proximity_precision == ProximityPrecision::ByAttribute + || self.old.proximity_precision == ProximityPrecision::ByAttribute) } pub fn reindex_searchable_id(&self, id: FieldId) -> Option { - if self.cache_reindex_searchable_without_user_defined { + if self.cache_reindex_searchable_without_user_defined || self.cache_exact_attributes { Some(DelAddOperation::DeletionAndAddition) } else if let Some(only_additional_fields) = &self.only_additional_fields { let additional_field = self.new.fields_ids_map.name(id).unwrap();