5288: Improve AI logging r=dureuill a=Kerollmops

This PR fixes #5285 and brings the changes from #5233 to simplify debugging indexation and search performance issues related to AI. The following texts can be found in the logs to debug and understand performance issues:

 - `embed_one: search` represents the time we spent waiting for the embedding generation, i.e., OpenAI, local HuggingFace, Ollama.
 - `filtered_universe: search::universe` the time spent filtering the documents.
 - ~`next_bucket: search::vector_sort` is the time spent finding the nearest neighbors (ANNs) in the vector store (arroy), locally~ was being triggered too many times.
 - `indexing::vectors` is the time arroy spends indexing the new vectors for a batch.
 - `documents::extract vectors` and `documents::merge vectors` to see the time spent generating and writing the embeddings.

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2025-02-04 10:20:45 +00:00 committed by GitHub
commit 796acd1aee
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 6 additions and 5 deletions

View File

@ -1,5 +1,5 @@
<p align="center"> <p align="center">
<img alt="the milli logo" src="../assets/milli-logo.svg"> <img alt="the milli logo" src="../../assets/milli-logo.svg">
</p> </p>
<p align="center">a concurrent indexer combined with fast and relevant search algorithms</p> <p align="center">a concurrent indexer combined with fast and relevant search algorithms</p>

View File

@ -563,7 +563,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
Ok(()) Ok(())
} }
#[tracing::instrument(level = "trace", skip_all, target = "search::universe")] #[tracing::instrument(level = "debug", skip_all, target = "search::universe")]
pub fn filtered_universe( pub fn filtered_universe(
index: &Index, index: &Index,
txn: &RoTxn<'_>, txn: &RoTxn<'_>,

View File

@ -234,7 +234,7 @@ where
); );
let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
{ {
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors"); let span = tracing::debug_span!(target: "indexing::documents::extract", "vectors");
let _entered = span.enter(); let _entered = span.enter();
extract( extract(
@ -247,7 +247,7 @@ where
)?; )?;
} }
{ {
let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors"); let span = tracing::debug_span!(target: "indexing::documents::merge", "vectors");
let _entered = span.enter(); let _entered = span.enter();
for config in &mut index_embeddings { for config in &mut index_embeddings {

View File

@ -88,7 +88,7 @@ pub(super) fn write_to_db(
Ok(()) Ok(())
} }
#[tracing::instrument(level = "trace", skip_all, target = "indexing::vectors")] #[tracing::instrument(level = "debug", skip_all, target = "indexing::vectors")]
pub(super) fn build_vectors<MSP>( pub(super) fn build_vectors<MSP>(
index: &Index, index: &Index,
wtxn: &mut RwTxn<'_>, wtxn: &mut RwTxn<'_>,

View File

@ -611,6 +611,7 @@ impl Embedder {
} }
} }
#[tracing::instrument(level = "debug", skip_all, target = "search")]
pub fn embed_one( pub fn embed_one(
&self, &self,
text: String, text: String,