Avoid running proximity when only the exact attributes changes

This commit is contained in:
ManyTheFish 2024-06-05 12:48:44 +02:00
parent 0a4118329e
commit b833be46b9
2 changed files with 22 additions and 9 deletions

View File

@ -26,11 +26,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
indexer: GrenadParameters, indexer: GrenadParameters,
settings_diff: &InnerIndexSettingsDiff, settings_diff: &InnerIndexSettingsDiff,
) -> Result<grenad::Reader<BufReader<File>>> { ) -> Result<grenad::Reader<BufReader<File>>> {
let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord;
let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord;
// early return if the data shouldn't be deleted nor created. // early return if the data shouldn't be deleted nor created.
if !any_deletion && !any_addition { if settings_diff.settings_update_only && !settings_diff.reindex_proximities() {
let writer = create_writer( let writer = create_writer(
indexer.chunk_compression_type, indexer.chunk_compression_type,
indexer.chunk_compression_level, indexer.chunk_compression_level,
@ -39,8 +36,10 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
return writer_into_reader(writer); return writer_into_reader(writer);
} }
let max_memory = indexer.max_memory_by_thread(); let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord;
let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord;
let max_memory = indexer.max_memory_by_thread();
let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE) let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE)
.map(|_| { .map(|_| {
create_sorter( create_sorter(

View File

@ -1104,8 +1104,10 @@ pub struct InnerIndexSettingsDiff {
// Cache the check to see if all the stop_words, allowed_separators, dictionary, // Cache the check to see if all the stop_words, allowed_separators, dictionary,
// exact_attributes, proximity_precision are different. // exact_attributes, proximity_precision are different.
pub(crate) cache_reindex_searchable_without_user_defined: bool, pub(crate) cache_reindex_searchable_without_user_defined: bool,
// Cache the check to see if all the user_defined_searchables are different. // Cache the check to see if the user_defined_searchables are different.
pub(crate) cache_user_defined_searchables: bool, pub(crate) cache_user_defined_searchables: bool,
// Cache the check to see if the exact_attributes are different.
pub(crate) cache_exact_attributes: bool,
} }
impl InnerIndexSettingsDiff { impl InnerIndexSettingsDiff {
@ -1139,10 +1141,11 @@ impl InnerIndexSettingsDiff {
!= new_settings.stop_words.as_ref().map(|set| set.as_fst().as_bytes()) != new_settings.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
|| old_settings.allowed_separators != new_settings.allowed_separators || old_settings.allowed_separators != new_settings.allowed_separators
|| old_settings.dictionary != new_settings.dictionary || old_settings.dictionary != new_settings.dictionary
|| old_settings.exact_attributes != new_settings.exact_attributes
|| old_settings.proximity_precision != new_settings.proximity_precision || old_settings.proximity_precision != new_settings.proximity_precision
}; };
let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
let cache_user_defined_searchables = old_settings.user_defined_searchable_fields let cache_user_defined_searchables = old_settings.user_defined_searchable_fields
!= new_settings.user_defined_searchable_fields; != new_settings.user_defined_searchable_fields;
@ -1155,6 +1158,7 @@ impl InnerIndexSettingsDiff {
only_additional_fields, only_additional_fields,
cache_reindex_searchable_without_user_defined, cache_reindex_searchable_without_user_defined,
cache_user_defined_searchables, cache_user_defined_searchables,
cache_exact_attributes,
} }
} }
@ -1163,11 +1167,21 @@ impl InnerIndexSettingsDiff {
} }
pub fn reindex_searchable(&self) -> bool { pub fn reindex_searchable(&self) -> bool {
self.cache_reindex_searchable_without_user_defined || self.cache_user_defined_searchables self.cache_reindex_searchable_without_user_defined
|| self.cache_exact_attributes
|| self.cache_user_defined_searchables
}
pub fn reindex_proximities(&self) -> bool {
// if any searchable settings force the reindexing
(self.cache_reindex_searchable_without_user_defined || self.cache_user_defined_searchables)
// and if any settings needs the proximity database created
&& (self.old.proximity_precision == ProximityPrecision::ByAttribute
|| self.old.proximity_precision == ProximityPrecision::ByAttribute)
} }
pub fn reindex_searchable_id(&self, id: FieldId) -> Option<DelAddOperation> { pub fn reindex_searchable_id(&self, id: FieldId) -> Option<DelAddOperation> {
if self.cache_reindex_searchable_without_user_defined { if self.cache_reindex_searchable_without_user_defined || self.cache_exact_attributes {
Some(DelAddOperation::DeletionAndAddition) Some(DelAddOperation::DeletionAndAddition)
} else if let Some(only_additional_fields) = &self.only_additional_fields { } else if let Some(only_additional_fields) = &self.only_additional_fields {
let additional_field = self.new.fields_ids_map.name(id).unwrap(); let additional_field = self.new.fields_ids_map.name(id).unwrap();