From 02c3d6b26546a9f6f6b4406b3d7d077316d800d9 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 3 Apr 2024 11:19:45 +0200 Subject: [PATCH] finish work --- .../extract/extract_docid_word_positions.rs | 41 ++++++---- .../extract/extract_vector_points.rs | 58 ++++++++------ .../extract/extract_word_docids.rs | 22 ++--- .../extract_word_pair_proximity_docids.rs | 23 +++++- .../src/update/index_documents/extract/mod.rs | 80 ++++++++++--------- milli/src/update/index_documents/mod.rs | 16 +--- milli/src/update/index_documents/transform.rs | 19 +++-- milli/src/update/settings.rs | 39 ++++++--- 8 files changed, 171 insertions(+), 127 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index b1a6bb5a6..6cf7b3167 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -34,6 +34,7 @@ pub fn extract_docid_word_positions( let max_positions_per_attributes = max_positions_per_attributes .map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE)); let max_memory = indexer.max_memory_by_thread(); + let force_reindexing = settings_diff.reindex_searchable(); // initialize destination values. let mut documents_ids = RoaringBitmap::new(); @@ -54,12 +55,15 @@ pub fn extract_docid_word_positions( let mut value_buffer = Vec::new(); // initialize tokenizer. - // TODO: Fix ugly allocation + /// TODO: Fix ugly allocation let old_stop_words = settings_diff.old.stop_words.as_ref(); - let old_separators: Option> = - settings_diff.old.allowed_separators.map(|s| s.iter().map(String::as_str).collect()); + let old_separators: Option> = settings_diff + .old + .allowed_separators + .as_ref() + .map(|s| s.iter().map(String::as_str).collect()); let old_dictionary: Option> = - settings_diff.old.dictionary.map(|s| s.iter().map(String::as_str).collect()); + settings_diff.old.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect()); let mut del_builder = tokenizer_builder( old_stop_words, old_separators.as_deref(), @@ -68,12 +72,15 @@ pub fn extract_docid_word_positions( ); let del_tokenizer = del_builder.build(); - // TODO: Fix ugly allocation + /// TODO: Fix ugly allocation let new_stop_words = settings_diff.new.stop_words.as_ref(); - let new_separators: Option> = - settings_diff.new.allowed_separators.map(|s| s.iter().map(String::as_str).collect()); + let new_separators: Option> = settings_diff + .new + .allowed_separators + .as_ref() + .map(|s| s.iter().map(String::as_str).collect()); let new_dictionary: Option> = - settings_diff.new.dictionary.map(|s| s.iter().map(String::as_str).collect()); + settings_diff.new.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect()); let mut add_builder = tokenizer_builder( new_stop_words, new_separators.as_deref(), @@ -92,10 +99,7 @@ pub fn extract_docid_word_positions( let obkv = KvReader::::new(value); // if the searchable fields didn't change, skip the searchable indexing for this document. - if !searchable_fields_changed( - &KvReader::::new(value), - &settings_diff.new.searchable_fields_ids, - ) { + if !force_reindexing && !searchable_fields_changed(&obkv, settings_diff) { continue; } @@ -180,8 +184,9 @@ pub fn extract_docid_word_positions( /// Check if any searchable fields of a document changed. fn searchable_fields_changed( obkv: &KvReader, - searchable_fields: &Option>, + settings_diff: &InnerIndexSettingsDiff, ) -> bool { + let searchable_fields = &settings_diff.new.searchable_fields_ids; for (field_id, field_bytes) in obkv.iter() { if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) { let del_add = KvReaderDelAdd::new(field_bytes); @@ -262,12 +267,14 @@ fn lang_safe_tokens_from_document<'a>( // then we don't rerun the extraction. if !script_language.is_empty() { // build a new temporary tokenizer including the allow list. - // TODO: Fix ugly allocation + /// TODO: Fix ugly allocation let stop_words = settings.stop_words.as_ref(); - let separators: Option> = - settings.allowed_separators.map(|s| s.iter().map(String::as_str).collect()); + let separators: Option> = settings + .allowed_separators + .as_ref() + .map(|s| s.iter().map(String::as_str).collect()); let dictionary: Option> = - settings.dictionary.map(|s| s.iter().map(String::as_str).collect()); + settings.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect()); let mut builder = tokenizer_builder(stop_words, separators.as_deref(), dictionary.as_deref(), None); let tokenizer = builder.build(); diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index 40b32bf9c..fc79a861f 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -17,8 +17,9 @@ use crate::error::UserError; use crate::prompt::Prompt; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::index_documents::helpers::try_split_at; +use crate::update::settings::InnerIndexSettingsDiff; use crate::vector::Embedder; -use crate::{DocumentId, FieldsIdsMap, InternalError, Result, VectorOrArrayOfVectors}; +use crate::{DocumentId, InternalError, Result, VectorOrArrayOfVectors}; /// The length of the elements that are always in the buffer when inserting new values. const TRUNCATE_SIZE: usize = size_of::(); @@ -71,12 +72,15 @@ impl VectorStateDelta { pub fn extract_vector_points( obkv_documents: grenad::Reader, indexer: GrenadParameters, - field_id_map: &FieldsIdsMap, + settings_diff: &InnerIndexSettingsDiff, prompt: &Prompt, embedder_name: &str, ) -> Result { puffin::profile_function!(); + let old_fields_ids_map = &settings_diff.old.fields_ids_map; + let new_fields_ids_map = &settings_diff.new.fields_ids_map; + // (docid, _index) -> KvWriterDelAdd -> Vector let mut manual_vectors_writer = create_writer( indexer.chunk_compression_type, @@ -98,8 +102,6 @@ pub fn extract_vector_points( tempfile::tempfile()?, ); - let vectors_fid = field_id_map.id("_vectors"); - let mut key_buffer = Vec::new(); let mut cursor = obkv_documents.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { @@ -116,15 +118,29 @@ pub fn extract_vector_points( // lazily get it when needed let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() }; - let vectors_field = vectors_fid - .and_then(|vectors_fid| obkv.get(vectors_fid)) - .map(KvReaderDelAdd::new) - .map(|obkv| to_vector_maps(obkv, document_id)) - .transpose()?; + // the vector field id may have changed + let old_vectors_fid = old_fields_ids_map.id("_vectors"); + // filter the old vector fid if the settings has been changed forcing reindexing. + let old_vectors_fid = old_vectors_fid.filter(|_| !settings_diff.reindex_vectors()); - let (del_map, add_map) = vectors_field.unzip(); - let del_map = del_map.flatten(); - let add_map = add_map.flatten(); + let new_vectors_fid = new_fields_ids_map.id("_vectors"); + let vectors_field = { + let del = old_vectors_fid + .and_then(|vectors_fid| obkv.get(vectors_fid)) + .map(KvReaderDelAdd::new) + .map(|obkv| to_vector_map(obkv, DelAdd::Deletion, &document_id)) + .transpose()? + .flatten(); + let add = new_vectors_fid + .and_then(|vectors_fid| obkv.get(vectors_fid)) + .map(KvReaderDelAdd::new) + .map(|obkv| to_vector_map(obkv, DelAdd::Addition, &document_id)) + .transpose()? + .flatten(); + (del, add) + }; + + let (del_map, add_map) = vectors_field; let del_value = del_map.and_then(|mut map| map.remove(embedder_name)); let add_value = add_map.and_then(|mut map| map.remove(embedder_name)); @@ -155,7 +171,7 @@ pub fn extract_vector_points( VectorStateDelta::NowGenerated(prompt.render( obkv, DelAdd::Addition, - field_id_map, + &new_fields_ids_map, )?) } else { VectorStateDelta::NowRemoved @@ -182,9 +198,10 @@ pub fn extract_vector_points( if document_is_kept { // Don't give up if the old prompt was failing - let old_prompt = - prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default(); - let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?; + let old_prompt = prompt + .render(obkv, DelAdd::Deletion, &old_fields_ids_map) + .unwrap_or_default(); + let new_prompt = prompt.render(obkv, DelAdd::Addition, &new_fields_ids_map)?; if old_prompt != new_prompt { tracing::trace!( "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}" @@ -220,15 +237,6 @@ pub fn extract_vector_points( }) } -fn to_vector_maps( - obkv: KvReaderDelAdd, - document_id: impl Fn() -> Value, -) -> Result<(Option>, Option>)> { - let del = to_vector_map(obkv, DelAdd::Deletion, &document_id)?; - let add = to_vector_map(obkv, DelAdd::Addition, &document_id)?; - Ok((del, add)) -} - fn to_vector_map( obkv: KvReaderDelAdd, side: DelAdd, diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs index 2b1f02326..2be41bb86 100644 --- a/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -121,16 +121,16 @@ pub fn extract_word_docids( let (w, fid) = StrBEU16Codec::bytes_decode(key) .map_err(|_| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; - if let Some(word) = word { - if word.as_str() != w { - docids_into_writers(&word, &deletions, &additions, &mut word_docids_writer); + if let Some(current) = word.as_ref() { + if current != w { + docids_into_writers(¤t, &deletions, &additions, &mut word_docids_writer)?; docids_into_writers( - &word, + ¤t, &exact_deletions, &exact_additions, &mut exact_word_docids_writer, - ); - let word = Some(w.to_string()); + )?; + word = Some(w.to_string()); // clear buffers deletions.clear(); additions.clear(); @@ -138,7 +138,7 @@ pub fn extract_word_docids( exact_additions.clear(); } } else { - let word = Some(w.to_string()); + word = Some(w.to_string()); } // merge all deletions @@ -169,13 +169,13 @@ pub fn extract_word_docids( } if let Some(word) = word { - docids_into_writers(&word, &deletions, &additions, &mut word_docids_writer); + docids_into_writers(&word, &deletions, &additions, &mut word_docids_writer)?; docids_into_writers( &word, &exact_deletions, &exact_additions, &mut exact_word_docids_writer, - ); + )?; } Ok(( @@ -253,7 +253,7 @@ where CboRoaringBitmapCodec::bytes_encode(deletions).map_err(|_| { SerializationError::Encoding { db_name: Some(DOCID_WORD_POSITIONS) } })?, - ); + )?; } // additions: if !additions.is_empty() { @@ -262,7 +262,7 @@ where CboRoaringBitmapCodec::bytes_encode(additions).map_err(|_| { SerializationError::Encoding { db_name: Some(DOCID_WORD_POSITIONS) } })?, - ); + )?; } // insert everything in the same writer. diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs index d86d09bc8..e185566ca 100644 --- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs @@ -11,7 +11,7 @@ use super::helpers::{ }; use crate::error::SerializationError; use crate::index::db_name::DOCID_WORD_POSITIONS; -use crate::proximity::{index_proximity, MAX_DISTANCE}; +use crate::proximity::{index_proximity, ProximityPrecision, MAX_DISTANCE}; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::settings::InnerIndexSettingsDiff; use crate::{DocumentId, Result}; @@ -24,9 +24,20 @@ use crate::{DocumentId, Result}; pub fn extract_word_pair_proximity_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, - _settings_diff: &InnerIndexSettingsDiff, + settings_diff: &InnerIndexSettingsDiff, ) -> Result>> { puffin::profile_function!(); + let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord; + let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord; + + // early return if the data shouldn't be deleted nor created. + if !any_deletion && !any_addition { + return tempfile::tempfile() + .map_err(Into::into) + .map(BufReader::new) + .and_then(grenad::Reader::new) + .map_err(Into::into); + } let max_memory = indexer.max_memory_by_thread(); @@ -79,6 +90,10 @@ pub fn extract_word_pair_proximity_docids( let (del, add): (Result<_>, Result<_>) = rayon::join( || { + if !any_deletion { + return Ok(()); + } + // deletions if let Some(deletion) = KvReaderDelAdd::new(value).get(DelAdd::Deletion) { for (position, word) in KvReaderU16::new(deletion).iter() { @@ -108,6 +123,10 @@ pub fn extract_word_pair_proximity_docids( Ok(()) }, || { + if !any_addition { + return Ok(()); + } + // additions if let Some(addition) = KvReaderDelAdd::new(value).get(DelAdd::Addition) { for (position, word) in KvReaderU16::new(addition).iter() { diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index a6b73efde..924561dea 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -9,7 +9,6 @@ mod extract_word_docids; mod extract_word_pair_proximity_docids; mod extract_word_position_docids; -use std::collections::HashSet; use std::fs::File; use std::io::BufReader; @@ -30,7 +29,6 @@ use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids use self::extract_word_position_docids::extract_word_position_docids; use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters}; use super::{helpers, TypedChunk}; -use crate::proximity::ProximityPrecision; use crate::update::settings::InnerIndexSettingsDiff; use crate::{FieldId, Result}; @@ -200,12 +198,14 @@ fn run_extraction_task( M: Send, { let current_span = tracing::Span::current(); + /// TODO: remove clone + let settings_diff = settings_diff.clone(); rayon::spawn(move || { let child_span = tracing::trace_span!(target: "indexing::extract::details", parent: ¤t_span, "extract_multiple_chunks"); let _entered = child_span.enter(); puffin::profile_scope!("extract_multiple_chunks", name); - match extract_fn(chunk, indexer, settings_diff) { + match extract_fn(chunk, indexer, &settings_diff) { Ok(chunk) => { let _ = lmdb_writer_sx.send(Ok(serialize_fn(chunk))); } @@ -235,50 +235,54 @@ fn send_original_documents_data( .thread_name(|index| format!("embedding-request-{index}")) .build()?; - rayon::spawn(move || { - for (name, (embedder, prompt)) in embedders { - let result = extract_vector_points( - documents_chunk_cloned.clone(), - indexer, - &field_id_map, - &prompt, - &name, - ); - match result { - Ok(ExtractedVectorPoints { manual_vectors, remove_vectors, prompts }) => { - let embeddings = match extract_embeddings( + if settings_diff.reindex_vectors() || !settings_diff.settings_update_only() { + /// TODO: remove clone + let settings_diff = settings_diff.clone(); + rayon::spawn(move || { + for (name, (embedder, prompt)) in settings_diff.new.embedding_configs.clone() { + let result = extract_vector_points( + documents_chunk_cloned.clone(), + indexer, + &settings_diff, + &prompt, + &name, + ); + match result { + Ok(ExtractedVectorPoints { manual_vectors, remove_vectors, prompts }) => { + let embeddings = match extract_embeddings( prompts, indexer, embedder.clone(), &request_threads, ) { - Ok(results) => Some(results), - Err(error) => { - let _ = lmdb_writer_sx_cloned.send(Err(error)); - None - } - }; + Ok(results) => Some(results), + Err(error) => { + let _ = lmdb_writer_sx_cloned.send(Err(error)); + None + } + }; - if !(remove_vectors.is_empty() - && manual_vectors.is_empty() - && embeddings.as_ref().map_or(true, |e| e.is_empty())) - { - let _ = lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints { - remove_vectors, - embeddings, - expected_dimension: embedder.dimensions(), - manual_vectors, - embedder_name: name, - })); + if !(remove_vectors.is_empty() + && manual_vectors.is_empty() + && embeddings.as_ref().map_or(true, |e| e.is_empty())) + { + let _ = lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints { + remove_vectors, + embeddings, + expected_dimension: embedder.dimensions(), + manual_vectors, + embedder_name: name, + })); + } + } + + Err(error) => { + let _ = lmdb_writer_sx_cloned.send(Err(error)); } } - - Err(error) => { - let _ = lmdb_writer_sx_cloned.send(Err(error)); - } } - } - }); + }); + } // TODO: create a custom internal error let _ = lmdb_writer_sx.send(Ok(TypedChunk::Documents(original_documents_chunk))); diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 6bc5b6ff9..c3b081c37 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -6,7 +6,6 @@ mod typed_chunk; use std::collections::{HashMap, HashSet}; use std::io::{Read, Seek}; -use std::iter::FromIterator; use std::num::NonZeroU32; use std::result::Result as StdResult; @@ -281,7 +280,7 @@ where let TransformOutput { primary_key, - settings_diff, + mut settings_diff, field_distribution, documents_count, original_documents, @@ -319,13 +318,8 @@ where ) = crossbeam_channel::unbounded(); // get the primary key field id - let primary_key_id = output.settings_diff.new.fields_ids_map.id(&primary_key).unwrap(); + let primary_key_id = settings_diff.new.fields_ids_map.id(&primary_key).unwrap(); - // get searchable fields for word databases - let searchable_fields = - self.index.searchable_fields_ids(self.wtxn)?.map(HashSet::from_iter); - // get filterable fields for facet databases - let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; // get the fid of the `_geo.lat` and `_geo.lng` fields. let mut field_id_map = self.index.fields_ids_map(self.wtxn)?; @@ -348,12 +342,6 @@ where None => None, }; - let stop_words = self.index.stop_words(self.wtxn)?; - let separators = self.index.allowed_separators(self.wtxn)?; - let dictionary = self.index.dictionary(self.wtxn)?; - let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?; - let proximity_precision = self.index.proximity_precision(self.wtxn)?.unwrap_or_default(); - let pool_params = GrenadParameters { chunk_compression_type: self.indexer_config.chunk_compression_type, chunk_compression_level: self.indexer_config.chunk_compression_level, diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 003353793..e82600683 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -1,12 +1,11 @@ use std::borrow::Cow; use std::collections::btree_map::Entry as BEntry; use std::collections::hash_map::Entry as HEntry; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::fs::File; use std::io::{Read, Seek}; use fxhash::FxHashMap; -use heed::RoTxn; use itertools::Itertools; use obkv::{KvReader, KvReaderU16, KvWriter}; use roaring::RoaringBitmap; @@ -814,7 +813,8 @@ impl<'a, 'i> Transform<'a, 'i> { let settings_diff = InnerIndexSettingsDiff { old: old_inner_settings, new: new_inner_settings, - embedding_configs_updated: true, + embedding_configs_updated: false, + settings_update_only: false, }; Ok(TransformOutput { @@ -844,13 +844,16 @@ impl<'a, 'i> Transform<'a, 'i> { obkv_writer.insert(id, val)?; } } - let new_obkv = KvReader::::new(&obkv_writer.into_inner()?); + let data = obkv_writer.into_inner()?; + let new_obkv = KvReader::::new(&data); // take the non-flattened version if flatten_from_fields_ids_map returns None. - let old_flattened = Self::flatten_from_fields_ids_map(&old_obkv, &mut old_fields_ids_map)? - .map_or_else(|| old_obkv, |bytes| KvReader::::new(&bytes)); - let new_flattened = Self::flatten_from_fields_ids_map(&new_obkv, &mut new_fields_ids_map)? - .map_or_else(|| new_obkv, |bytes| KvReader::::new(&bytes)); + let old_flattened = Self::flatten_from_fields_ids_map(&old_obkv, &mut old_fields_ids_map)?; + let old_flattened = + old_flattened.as_deref().map_or_else(|| old_obkv, KvReader::::new); + let new_flattened = Self::flatten_from_fields_ids_map(&new_obkv, &mut new_fields_ids_map)?; + let new_flattened = + new_flattened.as_deref().map_or_else(|| new_obkv, KvReader::::new); original_obkv_buffer.clear(); flattened_obkv_buffer.clear(); diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index ae4304fce..6c770c0a1 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1010,6 +1010,13 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { } Setting::NotSet => false, }; + + // if any changes force a reindexing + // clear the vector database. + if update { + self.index.vector_arroy.clear(self.wtxn)?; + } + Ok(update) } @@ -1077,6 +1084,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { old: old_inner_settings, new: new_inner_settings, embedding_configs_updated, + settings_update_only: true, }; if inner_settings_diff.any_reindexing_needed() { @@ -1087,20 +1095,23 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { } } -pub(crate) struct InnerIndexSettingsDiff { - pub old: InnerIndexSettings, - pub new: InnerIndexSettings, +#[derive(Clone)] +pub struct InnerIndexSettingsDiff { + pub(crate) old: InnerIndexSettings, + pub(crate) new: InnerIndexSettings, // TODO: compare directly the embedders. - pub embedding_configs_updated: bool, + pub(crate) embedding_configs_updated: bool, + + pub(crate) settings_update_only: bool, } impl InnerIndexSettingsDiff { - fn any_reindexing_needed(&self) -> bool { + pub fn any_reindexing_needed(&self) -> bool { self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors() } - fn reindex_searchable(&self) -> bool { + pub fn reindex_searchable(&self) -> bool { self.old .fields_ids_map .iter() @@ -1115,13 +1126,13 @@ impl InnerIndexSettingsDiff { || self.old.proximity_precision != self.new.proximity_precision } - fn reindex_facets(&self) -> bool { - let existing_fields = self.new.existing_fields; + pub fn reindex_facets(&self) -> bool { + let existing_fields = &self.new.existing_fields; if existing_fields.iter().any(|field| field.contains('.')) { return true; } - let old_faceted_fields = self.old.user_defined_faceted_fields; + let old_faceted_fields = &self.old.user_defined_faceted_fields; if old_faceted_fields.iter().any(|field| field.contains('.')) { return true; } @@ -1129,13 +1140,13 @@ impl InnerIndexSettingsDiff { // If there is new faceted fields we indicate that we must reindex as we must // index new fields as facets. It means that the distinct attribute, // an Asc/Desc criterion or a filtered attribute as be added or removed. - let new_faceted_fields = self.new.user_defined_faceted_fields; + let new_faceted_fields = &self.new.user_defined_faceted_fields; if new_faceted_fields.iter().any(|field| field.contains('.')) { return true; } let faceted_updated = - (&existing_fields - &old_faceted_fields) != (&existing_fields - &new_faceted_fields); + (existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields); self.old .fields_ids_map @@ -1145,9 +1156,13 @@ impl InnerIndexSettingsDiff { || faceted_updated } - fn reindex_vectors(&self) -> bool { + pub fn reindex_vectors(&self) -> bool { self.embedding_configs_updated } + + pub fn settings_update_only(&self) -> bool { + self.settings_update_only + } } #[derive(Clone)]