From 2ae545f38c2309a48a4bf508986b254fc1ce13c0 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 25 Jun 2025 11:48:58 +0200 Subject: [PATCH] Replace the legacy Settings::execute by the new one --- crates/benchmarks/benches/indexing.rs | 2 +- crates/benchmarks/benches/utils.rs | 2 +- .../src/scheduler/process_batch.rs | 5 +- .../src/scheduler/process_index_operation.rs | 9 +- crates/meilisearch/src/lib.rs | 5 +- .../milli/src/search/new/tests/integration.rs | 2 +- crates/milli/src/test_index.rs | 2 +- crates/milli/src/update/settings.rs | 115 +++++++++++++++++- crates/milli/tests/search/distinct.rs | 3 +- .../milli/tests/search/facet_distribution.rs | 2 +- crates/milli/tests/search/mod.rs | 2 +- crates/milli/tests/search/phrase_search.rs | 3 +- crates/milli/tests/search/query_criteria.rs | 6 +- crates/milli/tests/search/typo_tolerance.rs | 8 +- 14 files changed, 137 insertions(+), 29 deletions(-) diff --git a/crates/benchmarks/benches/indexing.rs b/crates/benchmarks/benches/indexing.rs index 9199c3877..681f895cb 100644 --- a/crates/benchmarks/benches/indexing.rs +++ b/crates/benchmarks/benches/indexing.rs @@ -65,7 +65,7 @@ fn setup_settings<'t>( let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect(); builder.set_sortable_fields(sortable_fields); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default()).unwrap(); } fn setup_index_with_settings( diff --git a/crates/benchmarks/benches/utils.rs b/crates/benchmarks/benches/utils.rs index aaa2d50a0..601a371e5 100644 --- a/crates/benchmarks/benches/utils.rs +++ b/crates/benchmarks/benches/utils.rs @@ -90,7 +90,7 @@ pub fn base_setup(conf: &Conf) -> Index { (conf.configure)(&mut builder); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default()).unwrap(); wtxn.commit().unwrap(); let config = IndexerConfig::default(); diff --git a/crates/index-scheduler/src/scheduler/process_batch.rs b/crates/index-scheduler/src/scheduler/process_batch.rs index c349f90ad..6c417a886 100644 --- a/crates/index-scheduler/src/scheduler/process_batch.rs +++ b/crates/index-scheduler/src/scheduler/process_batch.rs @@ -239,10 +239,7 @@ impl IndexScheduler { builder.set_primary_key(primary_key); let must_stop_processing = self.scheduler.must_stop_processing.clone(); builder - .execute( - |indexing_step| tracing::debug!(update = ?indexing_step), - || must_stop_processing.get(), - ) + .execute(&|| must_stop_processing.get(), &progress) .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; index_wtxn.commit()?; } diff --git a/crates/index-scheduler/src/scheduler/process_index_operation.rs b/crates/index-scheduler/src/scheduler/process_index_operation.rs index 093c6209d..0e303dff9 100644 --- a/crates/index-scheduler/src/scheduler/process_index_operation.rs +++ b/crates/index-scheduler/src/scheduler/process_index_operation.rs @@ -468,14 +468,11 @@ impl IndexScheduler { } progress.update_progress(SettingsProgress::ApplyTheSettings); - builder - .execute( - |indexing_step| tracing::debug!(update = ?indexing_step), - || must_stop_processing.get(), - ) + let congestion = builder + .execute(&|| must_stop_processing.get(), progress) .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; - Ok((tasks, None)) + Ok((tasks, congestion)) } IndexOperation::DocumentClearAndSetting { index_uid, diff --git a/crates/meilisearch/src/lib.rs b/crates/meilisearch/src/lib.rs index 1e0c205d0..08274bf2b 100644 --- a/crates/meilisearch/src/lib.rs +++ b/crates/meilisearch/src/lib.rs @@ -37,6 +37,7 @@ use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; use meilisearch_auth::{open_auth_store_env, AuthController}; use meilisearch_types::milli::constants::VERSION_MAJOR; use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; +use meilisearch_types::milli::progress::Progress; use meilisearch_types::milli::update::{ default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, }; @@ -463,6 +464,7 @@ fn import_dump( index_scheduler: &mut IndexScheduler, auth: &mut AuthController, ) -> Result<(), anyhow::Error> { + let progress = Progress::default(); let reader = File::open(dump_path)?; let mut dump_reader = dump::DumpReader::open(reader)?; @@ -542,8 +544,7 @@ fn import_dump( tracing::info!("Importing the settings."); let settings = index_reader.settings()?; apply_settings_to_builder(&settings, &mut builder); - builder - .execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?; + builder.execute(&|| false, &progress)?; // 4.3 Import the documents. // 4.3.1 We need to recreate the grenad+obkv format accepted by the index. diff --git a/crates/milli/src/search/new/tests/integration.rs b/crates/milli/src/search/new/tests/integration.rs index 4a6cc9b90..295103b2f 100644 --- a/crates/milli/src/search/new/tests/integration.rs +++ b/crates/milli/src/search/new/tests/integration.rs @@ -44,7 +44,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { S("america") => vec![S("the united states")], }); builder.set_searchable_fields(vec![S("title"), S("description")]); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default()).unwrap(); wtxn.commit().unwrap(); // index documents diff --git a/crates/milli/src/test_index.rs b/crates/milli/src/test_index.rs index dfd570b96..050b046a8 100644 --- a/crates/milli/src/test_index.rs +++ b/crates/milli/src/test_index.rs @@ -134,7 +134,7 @@ impl TempIndex { ) -> Result<(), crate::error::Error> { let mut builder = update::Settings::new(wtxn, &self.inner, &self.indexer_config); update(&mut builder); - builder.execute(drop, || false)?; + builder.execute(&|| false, &Progress::default())?; Ok(()) } diff --git a/crates/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs index f396cd079..a7ab2dd04 100644 --- a/crates/milli/src/update/settings.rs +++ b/crates/milli/src/update/settings.rs @@ -27,16 +27,20 @@ use crate::index::{ DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS, }; use crate::order_by_map::OrderByMap; +use crate::progress::Progress; use crate::prompt::{default_max_bytes, default_template_text, PromptData}; use crate::proximity::ProximityPrecision; use crate::update::index_documents::IndexDocumentsMethod; +use crate::update::new::indexer::reindex; use crate::update::{IndexDocuments, UpdateIndexingStep}; use crate::vector::settings::{ EmbedderAction, EmbedderSource, EmbeddingSettings, NestingContext, ReindexAction, SubEmbeddingSettings, WriteBackToDocuments, }; use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs}; -use crate::{FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result}; +use crate::{ + ChannelCongestion, FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result, +}; #[derive(Debug, Clone, PartialEq, Eq, Copy)] pub enum Setting { @@ -1355,7 +1359,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { } } - pub fn execute(mut self, progress_callback: FP, should_abort: FA) -> Result<()> + pub fn legacy_execute(mut self, progress_callback: FP, should_abort: FA) -> Result<()> where FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, @@ -1418,6 +1422,106 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { Ok(()) } + + pub fn execute<'indexer, MSP>( + mut self, + must_stop_processing: &'indexer MSP, + progress: &'indexer Progress, + ) -> Result> + where + MSP: Fn() -> bool + Sync, + { + // force the old indexer if the environment says so + if std::env::var_os("MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS").is_some() { + return self + .legacy_execute( + |indexing_step| tracing::debug!(update = ?indexing_step), + must_stop_processing, + ) + .map(|_| None); + } + + // only use the new indexer when only the embedder possibly changed + if let Self { + searchable_fields: Setting::NotSet, + displayed_fields: Setting::NotSet, + filterable_fields: Setting::NotSet, + sortable_fields: Setting::NotSet, + criteria: Setting::NotSet, + stop_words: Setting::NotSet, + non_separator_tokens: Setting::NotSet, + separator_tokens: Setting::NotSet, + dictionary: Setting::NotSet, + distinct_field: Setting::NotSet, + synonyms: Setting::NotSet, + primary_key: Setting::NotSet, + authorize_typos: Setting::NotSet, + min_word_len_two_typos: Setting::NotSet, + min_word_len_one_typo: Setting::NotSet, + exact_words: Setting::NotSet, + exact_attributes: Setting::NotSet, + max_values_per_facet: Setting::NotSet, + sort_facet_values_by: Setting::NotSet, + pagination_max_total_hits: Setting::NotSet, + proximity_precision: Setting::NotSet, + embedder_settings: _, + search_cutoff: Setting::NotSet, + localized_attributes_rules: Setting::NotSet, + prefix_search: Setting::NotSet, + facet_search: Setting::NotSet, + disable_on_numbers: Setting::NotSet, + chat: Setting::NotSet, + wtxn: _, + index: _, + indexer_config: _, // TODO: this is not used + } = &self + { + self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?; + + let old_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?; + + // Update index settings + let embedding_config_updates = self.update_embedding_configs()?; + + let mut new_inner_settings = + InnerIndexSettings::from_index(self.index, self.wtxn, None)?; + new_inner_settings.recompute_searchables(self.wtxn, self.index)?; + + let primary_key_id = self + .index + .primary_key(self.wtxn)? + .and_then(|name| new_inner_settings.fields_ids_map.id(name)); + let settings_update_only = true; + let inner_settings_diff = InnerIndexSettingsDiff::new( + old_inner_settings, + new_inner_settings, + primary_key_id, + embedding_config_updates, + settings_update_only, + ); + + if self.index.number_of_documents(self.wtxn)? > 0 { + reindex( + self.wtxn, + self.index, + &self.indexer_config.thread_pool, + self.indexer_config.grenad_parameters(), + &inner_settings_diff, + must_stop_processing, + progress, + ) + .map(Some) + } else { + Ok(None) + } + } else { + self.legacy_execute( + |indexing_step| tracing::debug!(update = ?indexing_step), + must_stop_processing, + ) + .map(|_| None) + } + } } pub struct InnerIndexSettingsDiff { @@ -1677,6 +1781,7 @@ pub(crate) struct InnerIndexSettings { pub disabled_typos_terms: DisabledTyposTerms, pub proximity_precision: ProximityPrecision, pub embedding_configs: EmbeddingConfigs, + pub embedder_category_id: HashMap, pub geo_fields_ids: Option<(FieldId, FieldId)>, pub prefix_search: PrefixSearch, pub facet_search: bool, @@ -1699,6 +1804,11 @@ impl InnerIndexSettings { Some(embedding_configs) => embedding_configs, None => embedders(index.embedding_configs(rtxn)?)?, }; + let embedder_category_id = index + .embedder_category_id + .iter(rtxn)? + .map(|r| r.map(|(k, v)| (k.to_string(), v))) + .collect::>()?; let prefix_search = index.prefix_search(rtxn)?.unwrap_or_default(); let facet_search = index.facet_search(rtxn)?; let geo_fields_ids = match fields_ids_map.id(RESERVED_GEO_FIELD_NAME) { @@ -1738,6 +1848,7 @@ impl InnerIndexSettings { exact_attributes, proximity_precision, embedding_configs, + embedder_category_id, geo_fields_ids, prefix_search, facet_search, diff --git a/crates/milli/tests/search/distinct.rs b/crates/milli/tests/search/distinct.rs index fc890dfe8..c22755751 100644 --- a/crates/milli/tests/search/distinct.rs +++ b/crates/milli/tests/search/distinct.rs @@ -1,6 +1,7 @@ use std::collections::HashSet; use big_s::S; +use milli::progress::Progress; use milli::update::Settings; use milli::{Criterion, Search, SearchResult, TermsMatchingStrategy}; use Criterion::*; @@ -19,7 +20,7 @@ macro_rules! test_distinct { let config = milli::update::IndexerConfig::default(); let mut builder = Settings::new(&mut wtxn, &index, &config); builder.set_distinct_field(S(stringify!($distinct))); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default()).unwrap(); wtxn.commit().unwrap(); let rtxn = index.read_txn().unwrap(); diff --git a/crates/milli/tests/search/facet_distribution.rs b/crates/milli/tests/search/facet_distribution.rs index 8934cbea4..6b88a79c7 100644 --- a/crates/milli/tests/search/facet_distribution.rs +++ b/crates/milli/tests/search/facet_distribution.rs @@ -25,7 +25,7 @@ fn test_facet_distribution_with_no_facet_values() { FilterableAttributesRule::Field(S("genres")), FilterableAttributesRule::Field(S("tags")), ]); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default()).unwrap(); wtxn.commit().unwrap(); // index documents diff --git a/crates/milli/tests/search/mod.rs b/crates/milli/tests/search/mod.rs index 906956716..c4d5aa8fe 100644 --- a/crates/milli/tests/search/mod.rs +++ b/crates/milli/tests/search/mod.rs @@ -63,7 +63,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { S("america") => vec![S("the united states")], }); builder.set_searchable_fields(vec![S("title"), S("description")]); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default()).unwrap(); wtxn.commit().unwrap(); // index documents diff --git a/crates/milli/tests/search/phrase_search.rs b/crates/milli/tests/search/phrase_search.rs index b7f792bfc..da519c6f6 100644 --- a/crates/milli/tests/search/phrase_search.rs +++ b/crates/milli/tests/search/phrase_search.rs @@ -1,3 +1,4 @@ +use milli::progress::Progress; use milli::update::{IndexerConfig, Settings}; use milli::{Criterion, Index, Search, TermsMatchingStrategy}; @@ -10,7 +11,7 @@ fn set_stop_words(index: &Index, stop_words: &[&str]) { let mut builder = Settings::new(&mut wtxn, index, &config); let stop_words = stop_words.iter().map(|s| s.to_string()).collect(); builder.set_stop_words(stop_words); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default()).unwrap(); wtxn.commit().unwrap(); } diff --git a/crates/milli/tests/search/query_criteria.rs b/crates/milli/tests/search/query_criteria.rs index 1acc89484..a92c205d7 100644 --- a/crates/milli/tests/search/query_criteria.rs +++ b/crates/milli/tests/search/query_criteria.rs @@ -236,7 +236,7 @@ fn criteria_mixup() { let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, &config); builder.set_criteria(criteria.clone()); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default()).unwrap(); wtxn.commit().unwrap(); let rtxn = index.read_txn().unwrap(); @@ -276,7 +276,7 @@ fn criteria_ascdesc() { S("name"), S("age"), }); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default()).unwrap(); wtxn.commit().unwrap(); let mut wtxn = index.write_txn().unwrap(); @@ -358,7 +358,7 @@ fn criteria_ascdesc() { let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, &config); builder.set_criteria(vec![criterion.clone()]); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default()).unwrap(); wtxn.commit().unwrap(); let rtxn = index.read_txn().unwrap(); diff --git a/crates/milli/tests/search/typo_tolerance.rs b/crates/milli/tests/search/typo_tolerance.rs index 3c0717063..3201ecffa 100644 --- a/crates/milli/tests/search/typo_tolerance.rs +++ b/crates/milli/tests/search/typo_tolerance.rs @@ -46,7 +46,7 @@ fn test_typo_tolerance_one_typo() { let config = IndexerConfig::default(); let mut builder = Settings::new(&mut txn, &index, &config); builder.set_min_word_len_one_typo(4); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default()).unwrap(); // typo is now supported for 4 letters words let mut search = Search::new(&txn, &index); @@ -92,7 +92,7 @@ fn test_typo_tolerance_two_typo() { let config = IndexerConfig::default(); let mut builder = Settings::new(&mut txn, &index, &config); builder.set_min_word_len_two_typos(7); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default()).unwrap(); // typo is now supported for 4 letters words let mut search = Search::new(&txn, &index); @@ -180,7 +180,7 @@ fn test_typo_disabled_on_word() { // `zealand` doesn't allow typos anymore exact_words.insert("zealand".to_string()); builder.set_exact_words(exact_words); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default()).unwrap(); let mut search = Search::new(&txn, &index); search.query("zealand"); @@ -218,7 +218,7 @@ fn test_disable_typo_on_attribute() { let mut builder = Settings::new(&mut txn, &index, &config); // disable typos on `description` builder.set_exact_attributes(vec!["description".to_string()].into_iter().collect()); - builder.execute(|_| (), || false).unwrap(); + builder.execute(&|| false, &Progress::default()).unwrap(); let mut search = Search::new(&txn, &index); search.query("antebelum");