From d9e50741896869631206555e5727b0bd61824c9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 29 May 2024 17:46:28 +0200 Subject: [PATCH] Introduce a new way to determine the operations to perform on the fields --- milli/src/update/index_documents/transform.rs | 47 +++---------------- milli/src/update/settings.rs | 28 ++++++++++- 2 files changed, 34 insertions(+), 41 deletions(-) diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index dc6642b8a..34e40b7f6 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -843,29 +843,6 @@ impl<'a, 'i> Transform<'a, 'i> { // Always keep the primary key. let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) }; - // If only the `searchableAttributes` has been changed, keep only the searchable fields. - // However, if only new searchable attributes are added, this function will - // return false has fields do not need to be reindexed. - let must_reindex_searchables = settings_diff.reindex_searchable(); - let must_index_only_additional_searchables = &settings_diff.only_additional_fields(); - let necessary_searchable_field_to_reindex = move |id: FieldId| -> bool { - must_index_only_additional_searchables.is_none() - && must_reindex_searchables - && (settings_diff.old.searchable_fields_ids.contains(&id) - || settings_diff.new.searchable_fields_ids.contains(&id)) - }; - - // If only new `searchableAttributes` are present, keep only those ones. - let additional_searchable_field_only = move |id: FieldId| -> bool { - match must_index_only_additional_searchables { - Some(additional_fields) => { - let additional_field = settings_diff.new.fields_ids_map.name(id).unwrap(); - additional_fields.contains(additional_field) - } - None => false, - } - }; - // If only a faceted field has been added, keep only this field. let must_reindex_facets = settings_diff.reindex_facets(); let necessary_faceted_field = |id: FieldId| -> bool { @@ -880,20 +857,16 @@ impl<'a, 'i> Transform<'a, 'i> { // we need the fields for the prompt/templating. let reindex_vectors = settings_diff.reindex_vectors(); - // The set of additional searchable fields only, - // the only purpose of these fields is to be indexed from scratch. - let mut additional_searchables_only = HashSet::new(); + // The operations that we must perform on the different fields. + let mut operations = HashMap::new(); let mut obkv_writer = KvWriter::<_, FieldId>::memory(); for (id, val) in old_obkv.iter() { - if is_primary_key(id) - || necessary_searchable_field_to_reindex(id) - || necessary_faceted_field(id) - || reindex_vectors - { + if is_primary_key(id) || necessary_faceted_field(id) || reindex_vectors { + operations.insert(id, DelAddOperation::DeletionAndAddition); obkv_writer.insert(id, val)?; - } else if additional_searchable_field_only(id) { - additional_searchables_only.insert(id); + } else if let Some(operation) = settings_diff.reindex_searchable_id(id) { + operations.insert(id, operation); obkv_writer.insert(id, val)?; } } @@ -913,13 +886,7 @@ impl<'a, 'i> Transform<'a, 'i> { flattened_obkv_buffer.clear(); into_del_add_obkv_conditional_operation(flattened, flattened_obkv_buffer, |id| { - // If the field is only required because it is an additional - // searchable field only define it as an DelAdd::Addition only. - if additional_searchables_only.contains(&id) { - DelAddOperation::Addition - } else { - DelAddOperation::DeletionAndAddition - } + operations.get(&id).copied().unwrap_or(DelAddOperation::DeletionAndAddition) })?; } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index af37a205c..b401adff9 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -9,6 +9,7 @@ use itertools::{EitherOrBoth, Itertools}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use time::OffsetDateTime; +use super::del_add::DelAddOperation; use super::index_documents::{IndexDocumentsConfig, Transform}; use super::IndexerConfig; use crate::criterion::Criterion; @@ -1112,6 +1113,31 @@ impl InnerIndexSettingsDiff { || self.old.proximity_precision != self.new.proximity_precision } + pub fn reindex_searchable_id(&self, id: FieldId) -> Option { + if self.old.stop_words.as_ref().map(|set| set.as_fst().as_bytes()) + != self.new.stop_words.as_ref().map(|set| set.as_fst().as_bytes()) + || self.old.allowed_separators != self.new.allowed_separators + || self.old.dictionary != self.new.dictionary + || self.old.exact_attributes != self.new.exact_attributes + // Here we can be much more optimal by just deleting the proximity database + || self.old.proximity_precision != self.new.proximity_precision + { + Some(DelAddOperation::DeletionAndAddition) + } else if let Some(only_additional_fields) = self.only_additional_fields() { + let additional_field = self.new.fields_ids_map.name(id).unwrap(); + if only_additional_fields.contains(additional_field) { + Some(DelAddOperation::Addition) + } else { + None + } + } else if self.old.user_defined_searchable_fields != self.new.user_defined_searchable_fields + { + Some(DelAddOperation::DeletionAndAddition) + } else { + None + } + } + /// Returns only the additional searchable fields. /// If any other searchable field has been modified, returns None. pub fn only_additional_fields(&self) -> Option> { @@ -1599,7 +1625,7 @@ mod tests { // When we search for something that is not in // the searchable fields it must not return any document. let result = index.search(&rtxn).query("23").execute().unwrap(); - assert!(result.documents_ids.is_empty()); + assert_eq!(result.documents_ids, Vec::::new()); // When we search for something that is in the searchable fields // we must find the appropriate document.