diff --git a/crates/benchmarks/benches/indexing.rs b/crates/benchmarks/benches/indexing.rs index 610fa4a00..16e7a2f81 100644 --- a/crates/benchmarks/benches/indexing.rs +++ b/crates/benchmarks/benches/indexing.rs @@ -65,7 +65,7 @@ fn setup_settings<'t>( let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect(); builder.set_sortable_fields(sortable_fields); - builder.execute(&|| false, &Progress::default()).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); } fn setup_index_with_settings( diff --git a/crates/benchmarks/benches/utils.rs b/crates/benchmarks/benches/utils.rs index 2cacc5477..54bb7e51b 100644 --- a/crates/benchmarks/benches/utils.rs +++ b/crates/benchmarks/benches/utils.rs @@ -90,7 +90,7 @@ pub fn base_setup(conf: &Conf) -> Index { (conf.configure)(&mut builder); - builder.execute(&|| false, &Progress::default()).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); let config = IndexerConfig::default(); diff --git a/crates/index-scheduler/src/scheduler/process_batch.rs b/crates/index-scheduler/src/scheduler/process_batch.rs index 237608648..e6bf6f713 100644 --- a/crates/index-scheduler/src/scheduler/process_batch.rs +++ b/crates/index-scheduler/src/scheduler/process_batch.rs @@ -245,7 +245,11 @@ impl IndexScheduler { let must_stop_processing = self.scheduler.must_stop_processing.clone(); builder - .execute(&|| must_stop_processing.get(), &progress) + .execute( + &|| must_stop_processing.get(), + &progress, + current_batch.embedder_stats.clone(), + ) .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; index_wtxn.commit()?; } diff --git a/crates/index-scheduler/src/scheduler/process_index_operation.rs b/crates/index-scheduler/src/scheduler/process_index_operation.rs index c302d6983..04aaf9a84 100644 --- a/crates/index-scheduler/src/scheduler/process_index_operation.rs +++ b/crates/index-scheduler/src/scheduler/process_index_operation.rs @@ -475,7 +475,7 @@ impl IndexScheduler { progress.update_progress(SettingsProgress::ApplyTheSettings); let congestion = builder - .execute(&|| must_stop_processing.get(), progress) + .execute(&|| must_stop_processing.get(), progress, embedder_stats) .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; Ok((tasks, congestion)) diff --git a/crates/meilisearch/src/lib.rs b/crates/meilisearch/src/lib.rs index 4bfce17f8..871bd688e 100644 --- a/crates/meilisearch/src/lib.rs +++ b/crates/meilisearch/src/lib.rs @@ -37,7 +37,7 @@ use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; use meilisearch_auth::{open_auth_store_env, AuthController}; use meilisearch_types::milli::constants::VERSION_MAJOR; use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; -use meilisearch_types::milli::progress::Progress; +use meilisearch_types::milli::progress::{EmbedderStats, Progress}; use meilisearch_types::milli::update::{ default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, }; @@ -544,7 +544,8 @@ fn import_dump( tracing::info!("Importing the settings."); let settings = index_reader.settings()?; apply_settings_to_builder(&settings, &mut builder); - builder.execute(&|| false, &progress)?; + let embedder_stats: Arc = Default::default(); + builder.execute(&|| false, &progress, embedder_stats.clone())?; // 4.3 Import the documents. // 4.3.1 We need to recreate the grenad+obkv format accepted by the index. diff --git a/crates/milli/src/search/new/tests/integration.rs b/crates/milli/src/search/new/tests/integration.rs index 700a527ac..9e2afca97 100644 --- a/crates/milli/src/search/new/tests/integration.rs +++ b/crates/milli/src/search/new/tests/integration.rs @@ -44,7 +44,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { S("america") => vec![S("the united states")], }); builder.set_searchable_fields(vec![S("title"), S("description")]); - builder.execute(&|| false, &Progress::default()).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); // index documents diff --git a/crates/milli/src/test_index.rs b/crates/milli/src/test_index.rs index 03bef5838..f2e34c615 100644 --- a/crates/milli/src/test_index.rs +++ b/crates/milli/src/test_index.rs @@ -135,7 +135,7 @@ impl TempIndex { ) -> Result<(), crate::error::Error> { let mut builder = update::Settings::new(wtxn, &self.inner, &self.indexer_config); update(&mut builder); - builder.execute(&|| false, &Progress::default())?; + builder.execute(&|| false, &Progress::default(), Default::default())?; Ok(()) } diff --git a/crates/milli/src/update/new/extract/vectors/mod.rs b/crates/milli/src/update/new/extract/vectors/mod.rs index 252e136fd..edb68b6db 100644 --- a/crates/milli/src/update/new/extract/vectors/mod.rs +++ b/crates/milli/src/update/new/extract/vectors/mod.rs @@ -303,6 +303,7 @@ pub struct SettingsChangeEmbeddingExtractor<'a, 'b> { old_embedders: &'a EmbeddingConfigs, embedder_actions: &'a BTreeMap, embedder_category_id: &'a std::collections::HashMap, + embedder_stats: &'a EmbedderStats, sender: EmbeddingSender<'a, 'b>, possible_embedding_mistakes: PossibleEmbeddingMistakes, threads: &'a ThreadPoolNoAbort, @@ -314,6 +315,7 @@ impl<'a, 'b> SettingsChangeEmbeddingExtractor<'a, 'b> { old_embedders: &'a EmbeddingConfigs, embedder_actions: &'a BTreeMap, embedder_category_id: &'a std::collections::HashMap, + embedder_stats: &'a EmbedderStats, sender: EmbeddingSender<'a, 'b>, field_distribution: &'a FieldDistribution, threads: &'a ThreadPoolNoAbort, @@ -324,6 +326,7 @@ impl<'a, 'b> SettingsChangeEmbeddingExtractor<'a, 'b> { old_embedders, embedder_actions, embedder_category_id, + embedder_stats, sender, threads, possible_embedding_mistakes, @@ -371,6 +374,7 @@ impl<'extractor> SettingsChangeExtractor<'extractor> for SettingsChangeEmbedding prompt, context.data, &self.possible_embedding_mistakes, + self.embedder_stats, self.threads, self.sender, &context.doc_alloc, diff --git a/crates/milli/src/update/new/indexer/extract.rs b/crates/milli/src/update/new/indexer/extract.rs index 8ed9dc37a..2986d5d57 100644 --- a/crates/milli/src/update/new/indexer/extract.rs +++ b/crates/milli/src/update/new/indexer/extract.rs @@ -333,6 +333,7 @@ pub(super) fn extract_all_settings_changes( field_distribution: &mut BTreeMap, mut index_embeddings: Vec, modified_docids: &mut RoaringBitmap, + embedder_stats: &EmbedderStats, ) -> Result> where MSP: Fn() -> bool + Sync, @@ -371,6 +372,7 @@ where settings_delta.old_embedders(), settings_delta.embedder_actions(), settings_delta.new_embedder_category_id(), + embedder_stats, embedding_sender, field_distribution, request_threads(), diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index 2398b5f09..7d1ad6df5 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -1,6 +1,6 @@ use std::collections::BTreeMap; use std::sync::atomic::AtomicBool; -use std::sync::{Once, RwLock}; +use std::sync::{Arc, Once, RwLock}; use std::thread::{self, Builder}; use big_s::S; @@ -20,8 +20,8 @@ use super::steps::IndexingStep; use super::thread_local::ThreadLocal; use crate::documents::PrimaryKey; use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; -use crate::update::settings::SettingsDelta; use crate::progress::{EmbedderStats, Progress}; +use crate::update::settings::SettingsDelta; use crate::update::GrenadParameters; use crate::vector::settings::{EmbedderAction, WriteBackToDocuments}; use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs}; @@ -213,6 +213,7 @@ pub fn reindex<'indexer, 'index, MSP, SD>( settings_delta: &'indexer SD, must_stop_processing: &'indexer MSP, progress: &'indexer Progress, + embedder_stats: Arc, ) -> Result where MSP: Fn() -> bool + Sync, @@ -274,6 +275,7 @@ where field_distribution, index_embeddings, modified_docids, + &embedder_stats, ) }) .unwrap() diff --git a/crates/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs index 32e3b17f9..834b85978 100644 --- a/crates/milli/src/update/settings.rs +++ b/crates/milli/src/update/settings.rs @@ -27,8 +27,8 @@ use crate::index::{ DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS, }; use crate::order_by_map::OrderByMap; -use crate::progress::Progress; use crate::progress::EmbedderStats; +use crate::progress::Progress; use crate::prompt::{default_max_bytes, default_template_text, PromptData}; use crate::proximity::ProximityPrecision; use crate::update::index_documents::IndexDocumentsMethod; @@ -1362,7 +1362,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { } } - pub fn legacy_execute(mut self, progress_callback: FP, should_abort: FA) -> Result<()> + pub fn legacy_execute( + mut self, + progress_callback: FP, + should_abort: FA, + embedder_stats: Arc, + ) -> Result<()> where FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, @@ -1430,6 +1435,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { mut self, must_stop_processing: &'indexer MSP, progress: &'indexer Progress, + embedder_stats: Arc, ) -> Result> where MSP: Fn() -> bool + Sync, @@ -1440,6 +1446,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { .legacy_execute( |indexing_step| tracing::debug!(update = ?indexing_step), must_stop_processing, + embedder_stats, ) .map(|_| None); } @@ -1510,6 +1517,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { &inner_settings_diff, must_stop_processing, progress, + embedder_stats, ) .map(Some) } else { @@ -1519,6 +1527,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.legacy_execute( |indexing_step| tracing::debug!(update = ?indexing_step), must_stop_processing, + embedder_stats, ) .map(|_| None) } diff --git a/crates/milli/tests/search/distinct.rs b/crates/milli/tests/search/distinct.rs index c22755751..c7fa9befa 100644 --- a/crates/milli/tests/search/distinct.rs +++ b/crates/milli/tests/search/distinct.rs @@ -20,7 +20,7 @@ macro_rules! test_distinct { let config = milli::update::IndexerConfig::default(); let mut builder = Settings::new(&mut wtxn, &index, &config); builder.set_distinct_field(S(stringify!($distinct))); - builder.execute(&|| false, &Progress::default()).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); let rtxn = index.read_txn().unwrap(); diff --git a/crates/milli/tests/search/facet_distribution.rs b/crates/milli/tests/search/facet_distribution.rs index ff939ec47..d04db425e 100644 --- a/crates/milli/tests/search/facet_distribution.rs +++ b/crates/milli/tests/search/facet_distribution.rs @@ -25,7 +25,7 @@ fn test_facet_distribution_with_no_facet_values() { FilterableAttributesRule::Field(S("genres")), FilterableAttributesRule::Field(S("tags")), ]); - builder.execute(&|| false, &Progress::default()).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); // index documents diff --git a/crates/milli/tests/search/mod.rs b/crates/milli/tests/search/mod.rs index 0515ece66..3ee78561d 100644 --- a/crates/milli/tests/search/mod.rs +++ b/crates/milli/tests/search/mod.rs @@ -63,7 +63,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { S("america") => vec![S("the united states")], }); builder.set_searchable_fields(vec![S("title"), S("description")]); - builder.execute(&|| false, &Progress::default()).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); // index documents diff --git a/crates/milli/tests/search/phrase_search.rs b/crates/milli/tests/search/phrase_search.rs index da519c6f6..397729c20 100644 --- a/crates/milli/tests/search/phrase_search.rs +++ b/crates/milli/tests/search/phrase_search.rs @@ -11,7 +11,7 @@ fn set_stop_words(index: &Index, stop_words: &[&str]) { let mut builder = Settings::new(&mut wtxn, index, &config); let stop_words = stop_words.iter().map(|s| s.to_string()).collect(); builder.set_stop_words(stop_words); - builder.execute(&|| false, &Progress::default()).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); } diff --git a/crates/milli/tests/search/query_criteria.rs b/crates/milli/tests/search/query_criteria.rs index 113c8bc03..cb0c23e42 100644 --- a/crates/milli/tests/search/query_criteria.rs +++ b/crates/milli/tests/search/query_criteria.rs @@ -236,7 +236,7 @@ fn criteria_mixup() { let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, &config); builder.set_criteria(criteria.clone()); - builder.execute(&|| false, &Progress::default()).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); let rtxn = index.read_txn().unwrap(); @@ -276,7 +276,7 @@ fn criteria_ascdesc() { S("name"), S("age"), }); - builder.execute(&|| false, &Progress::default()).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); let mut wtxn = index.write_txn().unwrap(); @@ -359,7 +359,7 @@ fn criteria_ascdesc() { let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, &config); builder.set_criteria(vec![criterion.clone()]); - builder.execute(&|| false, &Progress::default()).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); wtxn.commit().unwrap(); let rtxn = index.read_txn().unwrap(); diff --git a/crates/milli/tests/search/typo_tolerance.rs b/crates/milli/tests/search/typo_tolerance.rs index f8e688215..49c9c7b5d 100644 --- a/crates/milli/tests/search/typo_tolerance.rs +++ b/crates/milli/tests/search/typo_tolerance.rs @@ -46,7 +46,7 @@ fn test_typo_tolerance_one_typo() { let config = IndexerConfig::default(); let mut builder = Settings::new(&mut txn, &index, &config); builder.set_min_word_len_one_typo(4); - builder.execute(&|| false, &Progress::default()).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); // typo is now supported for 4 letters words let mut search = Search::new(&txn, &index); @@ -92,7 +92,7 @@ fn test_typo_tolerance_two_typo() { let config = IndexerConfig::default(); let mut builder = Settings::new(&mut txn, &index, &config); builder.set_min_word_len_two_typos(7); - builder.execute(&|| false, &Progress::default()).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); // typo is now supported for 4 letters words let mut search = Search::new(&txn, &index); @@ -181,7 +181,7 @@ fn test_typo_disabled_on_word() { // `zealand` doesn't allow typos anymore exact_words.insert("zealand".to_string()); builder.set_exact_words(exact_words); - builder.execute(&|| false, &Progress::default()).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); let mut search = Search::new(&txn, &index); search.query("zealand"); @@ -219,7 +219,7 @@ fn test_disable_typo_on_attribute() { let mut builder = Settings::new(&mut txn, &index, &config); // disable typos on `description` builder.set_exact_attributes(vec!["description".to_string()].into_iter().collect()); - builder.execute(&|| false, &Progress::default()).unwrap(); + builder.execute(&|| false, &Progress::default(), Default::default()).unwrap(); let mut search = Search::new(&txn, &index); search.query("antebelum");