Reduce the number of complex calls to settings diff functions

This commit is contained in:
Clément Renault 2024-05-30 11:17:03 +02:00
parent d9e5074189
commit 3a78e988da
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
3 changed files with 53 additions and 32 deletions

View File

@ -808,13 +808,15 @@ impl<'a, 'i> Transform<'a, 'i> {
let mut new_inner_settings = old_inner_settings.clone();
new_inner_settings.fields_ids_map = fields_ids_map;
let settings_diff = InnerIndexSettingsDiff {
old: old_inner_settings,
new: new_inner_settings,
let embedding_configs_updated = false;
let settings_update_only = false;
let settings_diff = InnerIndexSettingsDiff::new(
old_inner_settings,
new_inner_settings,
primary_key_id,
embedding_configs_updated: false,
settings_update_only: false,
};
embedding_configs_updated,
settings_update_only,
);
Ok(TransformOutput {
primary_key,

View File

@ -489,7 +489,7 @@ pub(crate) fn write_typed_chunk_into_index(
}
let merger = builder.build();
if settings_diff.only_additional_fields().is_some() {
if settings_diff.only_additional_fields.is_some() {
write_proximity_entries_into_database_additional_searchables(
merger,
&index.word_pair_proximity_docids,

View File

@ -1073,13 +1073,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
.index
.primary_key(self.wtxn)?
.and_then(|name| new_inner_settings.fields_ids_map.id(name));
let inner_settings_diff = InnerIndexSettingsDiff {
old: old_inner_settings,
new: new_inner_settings,
let settings_update_only = true;
let inner_settings_diff = InnerIndexSettingsDiff::new(
old_inner_settings,
new_inner_settings,
primary_key_id,
embedding_configs_updated,
settings_update_only: true,
};
settings_update_only,
);
if inner_settings_diff.any_reindexing_needed() {
self.reindex(&progress_callback, &should_abort, inner_settings_diff)?;
@ -1096,9 +1097,46 @@ pub struct InnerIndexSettingsDiff {
// TODO: compare directly the embedders.
pub(crate) embedding_configs_updated: bool,
pub(crate) settings_update_only: bool,
/// The set of only the additional searchable fields.
/// If any other searchable field has been modified, is set to None.
pub(crate) only_additional_fields: Option<HashSet<String>>,
}
impl InnerIndexSettingsDiff {
pub(crate) fn new(
old_settings: InnerIndexSettings,
new_settings: InnerIndexSettings,
primary_key_id: Option<FieldId>,
embedding_configs_updated: bool,
settings_update_only: bool,
) -> Self {
let only_additional_fields = match (
&old_settings.user_defined_searchable_fields,
&new_settings.user_defined_searchable_fields,
) {
(None, None) | (Some(_), None) | (None, Some(_)) => None, // None means *
(Some(old), Some(new)) => {
let old: HashSet<_> = old.iter().cloned().collect();
let new: HashSet<_> = new.iter().cloned().collect();
if old.difference(&new).next().is_none() {
// if no field has been removed return only the additional ones
Some(&new - &old)
} else {
None
}
}
};
InnerIndexSettingsDiff {
old: old_settings,
new: new_settings,
primary_key_id,
embedding_configs_updated,
settings_update_only,
only_additional_fields,
}
}
pub fn any_reindexing_needed(&self) -> bool {
self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors()
}
@ -1123,7 +1161,7 @@ impl InnerIndexSettingsDiff {
|| self.old.proximity_precision != self.new.proximity_precision
{
Some(DelAddOperation::DeletionAndAddition)
} else if let Some(only_additional_fields) = self.only_additional_fields() {
} else if let Some(only_additional_fields) = &self.only_additional_fields {
let additional_field = self.new.fields_ids_map.name(id).unwrap();
if only_additional_fields.contains(additional_field) {
Some(DelAddOperation::Addition)
@ -1138,25 +1176,6 @@ impl InnerIndexSettingsDiff {
}
}
/// Returns only the additional searchable fields.
/// If any other searchable field has been modified, returns None.
pub fn only_additional_fields(&self) -> Option<HashSet<String>> {
match (&self.old.user_defined_searchable_fields, &self.new.user_defined_searchable_fields) {
(None, None) | (Some(_), None) | (None, Some(_)) => None, // None means *
(Some(old), Some(new)) => {
let old: HashSet<_> = old.iter().cloned().collect();
let new: HashSet<_> = new.iter().cloned().collect();
if old.difference(&new).next().is_none() {
// if no field has been removed
// return only the additional ones
Some(&new - &old)
} else {
None
}
}
}
}
pub fn reindex_facets(&self) -> bool {
let existing_fields = &self.new.existing_fields;
if existing_fields.iter().any(|field| field.contains('.')) {