5351: Bring back v1.13.0 changes into main r=irevoire a=Kerollmops

This PR brings back the changes made in v1.13 into the main branch.

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Clémentine <clementine@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2025-02-18 08:05:02 +00:00 committed by GitHub
commit 0f1aeb8eaa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
101 changed files with 8351 additions and 1518 deletions

View file

@ -234,7 +234,7 @@ where
);
let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
{
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
let span = tracing::debug_span!(target: "indexing::documents::extract", "vectors");
let _entered = span.enter();
extract(
@ -247,7 +247,7 @@ where
)?;
}
{
let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
let span = tracing::debug_span!(target: "indexing::documents::merge", "vectors");
let _entered = span.enter();
for config in &mut index_embeddings {

View file

@ -1,5 +1,5 @@
use std::sync::atomic::AtomicBool;
use std::sync::RwLock;
use std::sync::{Once, RwLock};
use std::thread::{self, Builder};
use big_s::S;
@ -33,6 +33,8 @@ mod post_processing;
mod update_by_function;
mod write;
static LOG_MEMORY_METRICS_ONCE: Once = Once::new();
/// This is the main function of this crate.
///
/// Give it the output of the [`Indexer::document_changes`] method and it will execute it in the [`rayon::ThreadPool`].
@ -93,6 +95,15 @@ where
},
);
LOG_MEMORY_METRICS_ONCE.call_once(|| {
tracing::debug!(
"Indexation allocated memory metrics - \
Total BBQueue size: {total_bbbuffer_capacity}, \
Total extractor memory: {:?}",
grenad_parameters.max_memory,
);
});
let (extractor_sender, writer_receiver) = pool
.install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000))
.unwrap();
@ -179,13 +190,16 @@ where
indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase);
build_vectors(
index,
wtxn,
index_embeddings,
&mut arroy_writers,
&indexing_context.must_stop_processing,
)?;
pool.install(|| {
build_vectors(
index,
wtxn,
index_embeddings,
&mut arroy_writers,
&indexing_context.must_stop_processing,
)
})
.unwrap()?;
post_processing::post_process(
indexing_context,

View file

@ -72,11 +72,23 @@ pub(super) fn write_to_db(
&mut aligned_embedding,
)?;
}
write_from_bbqueue(&mut writer_receiver, index, wtxn, arroy_writers, &mut aligned_embedding)?;
let direct_attempts = writer_receiver.sent_messages_attempts();
let blocking_attempts = writer_receiver.blocking_sent_messages_attempts();
let congestion_pct = (blocking_attempts as f64 / direct_attempts as f64) * 100.0;
tracing::debug!(
"Channel congestion metrics - \
Attempts: {direct_attempts}, \
Blocked attempts: {blocking_attempts} \
({congestion_pct:.1}% congestion)"
);
Ok(())
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::vectors")]
#[tracing::instrument(level = "debug", skip_all, target = "indexing::vectors")]
pub(super) fn build_vectors<MSP>(
index: &Index,
wtxn: &mut RwTxn<'_>,