mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
Expose the write channel congestion in the batches
This commit is contained in:
parent
3ff1de0a21
commit
05cc8c650c
12 changed files with 138 additions and 92 deletions
|
@ -73,6 +73,7 @@ pub use self::search::{
|
|||
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
|
||||
Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
pub use self::update::ChannelCongestion;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ pub use self::facet::bulk::FacetsUpdateBulk;
|
|||
pub use self::facet::incremental::FacetsUpdateIncrementalInner;
|
||||
pub use self::index_documents::*;
|
||||
pub use self::indexer_config::IndexerConfig;
|
||||
pub use self::new::ChannelCongestion;
|
||||
pub use self::settings::{validate_embedding_settings, Setting, Settings};
|
||||
pub use self::update_step::UpdateIndexingStep;
|
||||
pub use self::word_prefix_docids::WordPrefixDocids;
|
||||
|
|
|
@ -291,7 +291,7 @@ where
|
|||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
indexing_context.progress.update_progress(IndexingStep::WritingToDatabase);
|
||||
indexing_context.progress.update_progress(IndexingStep::TailWritingToDatabase);
|
||||
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
Result::Ok((facet_field_ids_delta, index_embeddings))
|
||||
|
|
|
@ -10,6 +10,7 @@ use hashbrown::HashMap;
|
|||
use heed::RwTxn;
|
||||
pub use partial_dump::PartialDump;
|
||||
pub use update_by_function::UpdateByFunction;
|
||||
pub use write::ChannelCongestion;
|
||||
use write::{build_vectors, update_index, write_to_db};
|
||||
|
||||
use super::channel::*;
|
||||
|
@ -53,7 +54,7 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP>(
|
|||
embedders: EmbeddingConfigs,
|
||||
must_stop_processing: &'indexer MSP,
|
||||
progress: &'indexer Progress,
|
||||
) -> Result<()>
|
||||
) -> Result<ChannelCongestion>
|
||||
where
|
||||
DC: DocumentChanges<'pl>,
|
||||
MSP: Fn() -> bool + Sync,
|
||||
|
@ -130,7 +131,7 @@ where
|
|||
let mut field_distribution = index.field_distribution(wtxn)?;
|
||||
let mut document_ids = index.documents_ids(wtxn)?;
|
||||
|
||||
thread::scope(|s| -> Result<()> {
|
||||
let congestion = thread::scope(|s| -> Result<ChannelCongestion> {
|
||||
let indexer_span = tracing::Span::current();
|
||||
let embedders = &embedders;
|
||||
let finished_extraction = &finished_extraction;
|
||||
|
@ -182,7 +183,8 @@ where
|
|||
|
||||
let mut arroy_writers = arroy_writers?;
|
||||
|
||||
write_to_db(writer_receiver, finished_extraction, index, wtxn, &arroy_writers)?;
|
||||
let congestion =
|
||||
write_to_db(writer_receiver, finished_extraction, index, wtxn, &arroy_writers)?;
|
||||
|
||||
indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors);
|
||||
|
||||
|
@ -210,7 +212,7 @@ where
|
|||
|
||||
indexing_context.progress.update_progress(IndexingStep::Finalizing);
|
||||
|
||||
Ok(()) as Result<_>
|
||||
Ok(congestion) as Result<_>
|
||||
})?;
|
||||
|
||||
// required to into_inner the new_fields_ids_map
|
||||
|
@ -227,5 +229,5 @@ where
|
|||
document_ids,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
Ok(congestion)
|
||||
}
|
||||
|
|
|
@ -14,13 +14,13 @@ use crate::update::settings::InnerIndexSettings;
|
|||
use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings};
|
||||
use crate::{Error, Index, InternalError, Result};
|
||||
|
||||
pub(super) fn write_to_db(
|
||||
pub fn write_to_db(
|
||||
mut writer_receiver: WriterBbqueueReceiver<'_>,
|
||||
finished_extraction: &AtomicBool,
|
||||
index: &Index,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
arroy_writers: &HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>,
|
||||
) -> Result<()> {
|
||||
) -> Result<ChannelCongestion> {
|
||||
// Used by by the ArroySetVector to copy the embedding into an
|
||||
// aligned memory area, required by arroy to accept a new vector.
|
||||
let mut aligned_embedding = Vec::new();
|
||||
|
@ -75,21 +75,36 @@ pub(super) fn write_to_db(
|
|||
|
||||
write_from_bbqueue(&mut writer_receiver, index, wtxn, arroy_writers, &mut aligned_embedding)?;
|
||||
|
||||
let direct_attempts = writer_receiver.sent_messages_attempts();
|
||||
let blocking_attempts = writer_receiver.blocking_sent_messages_attempts();
|
||||
let congestion_pct = (blocking_attempts as f64 / direct_attempts as f64) * 100.0;
|
||||
tracing::debug!(
|
||||
"Channel congestion metrics - \
|
||||
Attempts: {direct_attempts}, \
|
||||
Blocked attempts: {blocking_attempts} \
|
||||
({congestion_pct:.1}% congestion)"
|
||||
);
|
||||
Ok(ChannelCongestion {
|
||||
attempts: writer_receiver.sent_messages_attempts(),
|
||||
blocking_attempts: writer_receiver.blocking_sent_messages_attempts(),
|
||||
})
|
||||
}
|
||||
|
||||
Ok(())
|
||||
/// Stats exposing the congestion of a channel.
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct ChannelCongestion {
|
||||
/// Number of attempts to send a message into the bbqueue buffer.
|
||||
pub attempts: usize,
|
||||
/// Number of blocking attempts which require a retry.
|
||||
pub blocking_attempts: usize,
|
||||
}
|
||||
|
||||
impl ChannelCongestion {
|
||||
pub fn congestion_ratio(&self) -> f32 {
|
||||
// tracing::debug!(
|
||||
// "Channel congestion metrics - \
|
||||
// Attempts: {direct_attempts}, \
|
||||
// Blocked attempts: {blocking_attempts} \
|
||||
// ({congestion_pct:.1}% congestion)"
|
||||
// );
|
||||
|
||||
self.blocking_attempts as f32 / self.attempts as f32
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "debug", skip_all, target = "indexing::vectors")]
|
||||
pub(super) fn build_vectors<MSP>(
|
||||
pub fn build_vectors<MSP>(
|
||||
index: &Index,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
index_embeddings: Vec<IndexEmbeddingConfig>,
|
||||
|
@ -113,7 +128,7 @@ where
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) fn update_index(
|
||||
pub fn update_index(
|
||||
index: &Index,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
new_fields_ids_map: FieldIdMapWithMetadata,
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
pub use document_change::{Deletion, DocumentChange, Insertion, Update};
|
||||
pub use indexer::ChannelCongestion;
|
||||
pub use merger::{
|
||||
merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases, FacetFieldIdsDelta,
|
||||
};
|
||||
|
|
|
@ -14,7 +14,7 @@ pub enum IndexingStep {
|
|||
ExtractingWordProximity,
|
||||
ExtractingEmbeddings,
|
||||
WritingGeoPoints,
|
||||
WritingToDatabase,
|
||||
TailWritingToDatabase,
|
||||
WaitingForExtractors,
|
||||
WritingEmbeddingsToDatabase,
|
||||
PostProcessingFacets,
|
||||
|
@ -32,7 +32,7 @@ impl Step for IndexingStep {
|
|||
IndexingStep::ExtractingWordProximity => "extracting word proximity",
|
||||
IndexingStep::ExtractingEmbeddings => "extracting embeddings",
|
||||
IndexingStep::WritingGeoPoints => "writing geo points",
|
||||
IndexingStep::WritingToDatabase => "writing to database",
|
||||
IndexingStep::TailWritingToDatabase => "tail writing to database",
|
||||
IndexingStep::WaitingForExtractors => "waiting for extractors",
|
||||
IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database",
|
||||
IndexingStep::PostProcessingFacets => "post-processing facets",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue