Re-integrate embedder stats

This commit is contained in:
ManyTheFish 2025-06-30 09:46:19 +02:00
parent d35b2d8d33
commit 6db5939f84
17 changed files with 45 additions and 23 deletions

View file

@ -44,7 +44,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
S("america") => vec![S("the united states")],
});
builder.set_searchable_fields(vec![S("title"), S("description")]);
builder.execute(&|| false, &Progress::default()).unwrap();
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
wtxn.commit().unwrap();
// index documents

View file

@ -135,7 +135,7 @@ impl TempIndex {
) -> Result<(), crate::error::Error> {
let mut builder = update::Settings::new(wtxn, &self.inner, &self.indexer_config);
update(&mut builder);
builder.execute(&|| false, &Progress::default())?;
builder.execute(&|| false, &Progress::default(), Default::default())?;
Ok(())
}

View file

@ -303,6 +303,7 @@ pub struct SettingsChangeEmbeddingExtractor<'a, 'b> {
old_embedders: &'a EmbeddingConfigs,
embedder_actions: &'a BTreeMap<String, EmbedderAction>,
embedder_category_id: &'a std::collections::HashMap<String, u8>,
embedder_stats: &'a EmbedderStats,
sender: EmbeddingSender<'a, 'b>,
possible_embedding_mistakes: PossibleEmbeddingMistakes,
threads: &'a ThreadPoolNoAbort,
@ -314,6 +315,7 @@ impl<'a, 'b> SettingsChangeEmbeddingExtractor<'a, 'b> {
old_embedders: &'a EmbeddingConfigs,
embedder_actions: &'a BTreeMap<String, EmbedderAction>,
embedder_category_id: &'a std::collections::HashMap<String, u8>,
embedder_stats: &'a EmbedderStats,
sender: EmbeddingSender<'a, 'b>,
field_distribution: &'a FieldDistribution,
threads: &'a ThreadPoolNoAbort,
@ -324,6 +326,7 @@ impl<'a, 'b> SettingsChangeEmbeddingExtractor<'a, 'b> {
old_embedders,
embedder_actions,
embedder_category_id,
embedder_stats,
sender,
threads,
possible_embedding_mistakes,
@ -371,6 +374,7 @@ impl<'extractor> SettingsChangeExtractor<'extractor> for SettingsChangeEmbedding
prompt,
context.data,
&self.possible_embedding_mistakes,
self.embedder_stats,
self.threads,
self.sender,
&context.doc_alloc,

View file

@ -333,6 +333,7 @@ pub(super) fn extract_all_settings_changes<MSP, SD>(
field_distribution: &mut BTreeMap<String, u64>,
mut index_embeddings: Vec<IndexEmbeddingConfig>,
modified_docids: &mut RoaringBitmap,
embedder_stats: &EmbedderStats,
) -> Result<Vec<IndexEmbeddingConfig>>
where
MSP: Fn() -> bool + Sync,
@ -371,6 +372,7 @@ where
settings_delta.old_embedders(),
settings_delta.embedder_actions(),
settings_delta.new_embedder_category_id(),
embedder_stats,
embedding_sender,
field_distribution,
request_threads(),

View file

@ -1,6 +1,6 @@
use std::collections::BTreeMap;
use std::sync::atomic::AtomicBool;
use std::sync::{Once, RwLock};
use std::sync::{Arc, Once, RwLock};
use std::thread::{self, Builder};
use big_s::S;
@ -20,8 +20,8 @@ use super::steps::IndexingStep;
use super::thread_local::ThreadLocal;
use crate::documents::PrimaryKey;
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::update::settings::SettingsDelta;
use crate::progress::{EmbedderStats, Progress};
use crate::update::settings::SettingsDelta;
use crate::update::GrenadParameters;
use crate::vector::settings::{EmbedderAction, WriteBackToDocuments};
use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs};
@ -213,6 +213,7 @@ pub fn reindex<'indexer, 'index, MSP, SD>(
settings_delta: &'indexer SD,
must_stop_processing: &'indexer MSP,
progress: &'indexer Progress,
embedder_stats: Arc<EmbedderStats>,
) -> Result<ChannelCongestion>
where
MSP: Fn() -> bool + Sync,
@ -274,6 +275,7 @@ where
field_distribution,
index_embeddings,
modified_docids,
&embedder_stats,
)
})
.unwrap()

View file

@ -27,8 +27,8 @@ use crate::index::{
DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
};
use crate::order_by_map::OrderByMap;
use crate::progress::Progress;
use crate::progress::EmbedderStats;
use crate::progress::Progress;
use crate::prompt::{default_max_bytes, default_template_text, PromptData};
use crate::proximity::ProximityPrecision;
use crate::update::index_documents::IndexDocumentsMethod;
@ -1362,7 +1362,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
}
}
pub fn legacy_execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>
pub fn legacy_execute<FP, FA>(
mut self,
progress_callback: FP,
should_abort: FA,
embedder_stats: Arc<EmbedderStats>,
) -> Result<()>
where
FP: Fn(UpdateIndexingStep) + Sync,
FA: Fn() -> bool + Sync,
@ -1430,6 +1435,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
mut self,
must_stop_processing: &'indexer MSP,
progress: &'indexer Progress,
embedder_stats: Arc<EmbedderStats>,
) -> Result<Option<ChannelCongestion>>
where
MSP: Fn() -> bool + Sync,
@ -1440,6 +1446,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
.legacy_execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
must_stop_processing,
embedder_stats,
)
.map(|_| None);
}
@ -1510,6 +1517,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
&inner_settings_diff,
must_stop_processing,
progress,
embedder_stats,
)
.map(Some)
} else {
@ -1519,6 +1527,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.legacy_execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
must_stop_processing,
embedder_stats,
)
.map(|_| None)
}