From 5d7061682ef7eff9b42a5d1b24ee712cb6f92d01 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 23 Jan 2024 09:42:48 +0100 Subject: [PATCH] Add tracing to milli --- Cargo.lock | 1 + milli/Cargo.toml | 11 +---- milli/src/documents/reader.rs | 1 + milli/src/update/clear_documents.rs | 6 +++ milli/src/update/index_documents/enrich.rs | 3 ++ .../extract/extract_docid_word_positions.rs | 2 +- .../extract/extract_facet_number_docids.rs | 2 +- .../extract/extract_facet_string_docids.rs | 2 +- .../extract/extract_fid_docid_facet_values.rs | 2 +- .../extract/extract_fid_word_count_docids.rs | 2 +- .../extract/extract_geo_points.rs | 2 +- .../extract/extract_vector_points.rs | 2 +- .../extract/extract_word_docids.rs | 3 +- .../extract_word_pair_proximity_docids.rs | 14 ++++++- .../extract/extract_word_position_docids.rs | 3 +- .../src/update/index_documents/extract/mod.rs | 9 ++++ .../index_documents/helpers/grenad_helpers.rs | 2 + milli/src/update/index_documents/mod.rs | 36 ++++++++++++++-- milli/src/update/index_documents/transform.rs | 9 ++-- .../src/update/index_documents/typed_chunk.rs | 41 +++++++++++++++++++ milli/src/update/settings.rs | 5 +++ milli/src/update/word_prefix_docids.rs | 7 +++- .../src/update/words_prefix_integer_docids.rs | 7 +++- milli/src/update/words_prefixes_fst.rs | 7 +++- 24 files changed, 150 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ebb8c3072..cf28f560b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3840,6 +3840,7 @@ dependencies = [ "time", "tokenizers", "tokio", + "tracing", "uuid", ] diff --git a/milli/Cargo.toml b/milli/Cargo.toml index dc2b992e0..10ad2fb8f 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -91,6 +91,7 @@ tiktoken-rs = "0.5.8" liquid = "0.26.4" arroy = "0.2.0" rand = "0.8.5" +tracing = "0.1.40" [dev-dependencies] mimalloc = { version = "0.1.39", default-features = false } @@ -102,15 +103,7 @@ meili-snap = { path = "../meili-snap" } rand = { version = "0.8.5", features = ["small_rng"] } [features] -all-tokenizations = [ - "charabia/chinese", - "charabia/hebrew", - "charabia/japanese", - "charabia/thai", - "charabia/korean", - "charabia/greek", - "charabia/khmer", -] +all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"] # Use POSIX semaphores instead of SysV semaphores in LMDB # For more information on this feature, see heed's Cargo.toml diff --git a/milli/src/documents/reader.rs b/milli/src/documents/reader.rs index a8a4c662d..d5eda69d4 100644 --- a/milli/src/documents/reader.rs +++ b/milli/src/documents/reader.rs @@ -25,6 +25,7 @@ impl DocumentsBatchReader { /// /// It first retrieves the index, then moves to the first document. Use the `into_cursor` /// method to iterator over the documents, from the first to the last. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn from_reader(reader: R) -> Result { let reader = grenad::Reader::new(reader)?; let mut cursor = reader.into_cursor()?; diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index a6c7ff2b1..6715939dc 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -14,6 +14,12 @@ impl<'t, 'i> ClearDocuments<'t, 'i> { ClearDocuments { wtxn, index } } + #[tracing::instrument( + level = "trace", + skip(self), + target = "indexing::documents", + name = "clear_documents" + )] pub fn execute(self) -> Result { puffin::profile_function!(); diff --git a/milli/src/update/index_documents/enrich.rs b/milli/src/update/index_documents/enrich.rs index 03eb3f4de..82ea335ae 100644 --- a/milli/src/update/index_documents/enrich.rs +++ b/milli/src/update/index_documents/enrich.rs @@ -22,6 +22,7 @@ use crate::{FieldId, Index, Result}; /// # Panics /// /// - if reader.is_empty(), this function may panic in some cases +#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn enrich_documents_batch( rtxn: &heed::RoTxn, index: &Index, @@ -143,6 +144,8 @@ pub fn enrich_documents_batch( /// Retrieve the document id after validating it, returning a `UserError` /// if the id is invalid or can't be guessed. +#[tracing::instrument(level = "trace", skip(uuid_buffer, documents_batch_index, document) +target = "indexing::documents")] fn fetch_or_generate_document_id( document: &obkv::KvReader, documents_batch_index: &DocumentsBatchIndex, diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index a6bbf939a..d568154b2 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -21,7 +21,7 @@ pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, R /// /// Returns the generated internal documents ids and a grenad reader /// with the list of extracted words from the given chunk of documents. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_docid_word_positions( obkv_documents: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs index f860aacba..33def5abd 100644 --- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs @@ -16,7 +16,7 @@ use crate::Result; /// /// Returns a grenad reader with the list of extracted facet numbers and /// documents ids from the given chunk of docid facet number positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_facet_number_docids( fid_docid_facet_number: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index 2ade776c3..d14be7464 100644 --- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -15,7 +15,7 @@ use crate::{FieldId, Result}; /// /// Returns a grenad reader with the list of extracted facet strings and /// documents ids from the given chunk of docid facet string positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_facet_string_docids( docid_fid_facet_string: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index b7de1e621..2449e01cd 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -39,7 +39,7 @@ pub struct ExtractedFacetValues { /// Returns the generated grenad reader containing the docid the fid and the orginal value as key /// and the normalized value as value extracted from the given chunk of documents. /// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_fid_docid_facet_values( obkv_documents: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs index 182d0c5d8..305af3630 100644 --- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs +++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs @@ -19,7 +19,7 @@ const MAX_COUNTED_WORDS: usize = 30; /// /// Returns a grenad reader with the list of extracted field id word counts /// and documents ids from the given chunk of docid word positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_fid_word_count_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_geo_points.rs b/milli/src/update/index_documents/extract/extract_geo_points.rs index b3600e3bc..cfcc021c6 100644 --- a/milli/src/update/index_documents/extract/extract_geo_points.rs +++ b/milli/src/update/index_documents/extract/extract_geo_points.rs @@ -13,7 +13,7 @@ use crate::{FieldId, InternalError, Result}; /// Extracts the geographical coordinates contained in each document under the `_geo` field. /// /// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude) -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_geo_points( obkv_documents: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index 87181edc2..0bf7333e3 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -67,7 +67,7 @@ impl VectorStateDelta { /// Extracts the embedding vector contained in each document under the `_vectors` field. /// /// Returns the generated grenad reader containing the docid as key associated to the Vec -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_vector_points( obkv_documents: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs index 66092821f..f38701dac 100644 --- a/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -23,7 +23,7 @@ use crate::{DocumentId, FieldId, Result}; /// /// The first returned reader is the one for normal word_docids, and the second one is for /// exact_word_docids -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_word_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, @@ -135,6 +135,7 @@ pub fn extract_word_docids( )) } +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] fn words_into_sorter( document_id: DocumentId, fid: FieldId, diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs index d364b3e13..82a94ce00 100644 --- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs @@ -19,7 +19,7 @@ use crate::{DocumentId, Result}; /// /// Returns a grenad reader with the list of extracted word pairs proximities and /// documents ids from the given chunk of docid word positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_word_pair_proximity_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, @@ -59,6 +59,10 @@ pub fn extract_word_pair_proximity_docids( if current_document_id.map_or(false, |id| id != document_id) { puffin::profile_scope!("Document into sorter"); + // FIXME: span inside of a hot loop might degrade performance and create big reports + let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter"); + let _entered = span.enter(); + document_word_positions_into_sorter( current_document_id.unwrap(), &del_word_pair_proximity, @@ -138,6 +142,10 @@ pub fn extract_word_pair_proximity_docids( if let Some(document_id) = current_document_id { puffin::profile_scope!("Final document into sorter"); + // FIXME: span inside of a hot loop might degrade performance and create big reports + let span = tracing::trace_span!(target: "indexing::details", "final_document_into_sorter"); + let _entered = span.enter(); + document_word_positions_into_sorter( document_id, &del_word_pair_proximity, @@ -147,6 +155,10 @@ pub fn extract_word_pair_proximity_docids( } { puffin::profile_scope!("sorter_into_reader"); + // FIXME: span inside of a hot loop might degrade performance and create big reports + let span = tracing::trace_span!(target: "indexing::details", "sorter_into_reader"); + let _entered = span.enter(); + let mut writer = create_writer( indexer.chunk_compression_type, indexer.chunk_compression_level, diff --git a/milli/src/update/index_documents/extract/extract_word_position_docids.rs b/milli/src/update/index_documents/extract/extract_word_position_docids.rs index 89b77d140..4bc553d9a 100644 --- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs @@ -18,7 +18,7 @@ use crate::{bucketed_position, DocumentId, Result}; /// /// Returns a grenad reader with the list of extracted words at positions and /// documents ids from the given chunk of docid word positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_word_position_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, @@ -94,6 +94,7 @@ pub fn extract_word_position_docids( Ok(word_position_docids_reader) } +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] fn words_position_into_sorter( document_id: DocumentId, key_buffer: &mut Vec, diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 43e9a36ec..f0fd97965 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -41,6 +41,7 @@ use crate::{FieldId, FieldsIdsMap, Result}; /// Extract data for each databases from obkv documents in parallel. /// Send data in grenad file over provided Sender. #[allow(clippy::too_many_arguments)] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub(crate) fn data_from_obkv_documents( original_obkv_chunks: impl Iterator>>> + Send, flattened_obkv_chunks: impl Iterator>>> + Send, @@ -257,12 +258,20 @@ fn spawn_extraction_task( M: MergeableReader + FromParallelIterator + Send + 'static, M::Output: Send, { + let current_span = tracing::Span::current(); + rayon::spawn(move || { + let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "extract_multiple_chunks"); + let _entered = child_span.enter(); puffin::profile_scope!("extract_multiple_chunks", name); let chunks: Result = chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect(); + let current_span = tracing::Span::current(); + rayon::spawn(move || match chunks { Ok(chunks) => { + let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "merge_multiple_chunks"); + let _entered = child_span.enter(); debug!("merge {} database", name); puffin::profile_scope!("merge_multiple_chunks", name); let reader = chunks.merge(merge_fn, &indexer); diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs index e1b27baa2..e30bd0507 100644 --- a/milli/src/update/index_documents/helpers/grenad_helpers.rs +++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs @@ -49,6 +49,7 @@ pub fn create_sorter( builder.build() } +#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")] pub fn sorter_into_reader( sorter: grenad::Sorter, indexer: GrenadParameters, @@ -240,6 +241,7 @@ pub fn grenad_obkv_into_chunks( /// Write provided sorter in database using serialize_value function. /// merge_values function is used if an entry already exist in the database. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")] pub fn write_sorter_into_database( sorter: Sorter, database: &heed::Database, diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 83d2c08e1..cbcde19fc 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -134,6 +134,7 @@ where /// return an error and not the `IndexDocuments` struct as it is invalid to use it afterward. /// /// Returns the number of documents added to the builder. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn add_documents( mut self, reader: DocumentsBatchReader, @@ -179,6 +180,7 @@ where /// Remove a batch of documents from the current builder. /// /// Returns the number of documents deleted from the builder. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn remove_documents( mut self, to_delete: Vec, @@ -214,6 +216,7 @@ where /// - No batching using the standards `remove_documents` and `add_documents` took place /// /// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::details")] pub fn remove_documents_from_db_no_batch( mut self, to_delete: &RoaringBitmap, @@ -237,7 +240,12 @@ where Ok((self, deleted_documents)) } - #[logging_timer::time("IndexDocuments::{}")] + #[tracing::instrument( + level = "trace" + skip_all, + target = "indexing::documents", + name = "index_documents" + )] pub fn execute(mut self) -> Result { puffin::profile_function!(); @@ -273,7 +281,12 @@ where } /// Returns the total number of documents in the index after the update. - #[logging_timer::time("IndexDocuments::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "profile::indexing::details", + name = "index_documents_raw" + )] pub fn execute_raw(self, output: TransformOutput) -> Result where FP: Fn(UpdateIndexingStep) + Sync, @@ -374,8 +387,12 @@ where let cloned_embedder = self.embedders.clone(); + let current_span = tracing::Span::current(); + // Run extraction pipeline in parallel. pool.install(|| { + let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "extract_and_send_grenad_chunks"); + let _enter = child_span.enter(); puffin::profile_scope!("extract_and_send_grenad_chunks"); // split obkv file into several chunks let original_chunk_iter = @@ -543,7 +560,12 @@ where Ok(number_of_documents) } - #[logging_timer::time("IndexDocuments::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "index_documents_prefix_databases" + )] pub fn execute_prefix_databases( self, word_docids: Option>, @@ -598,6 +620,8 @@ where let del_prefix_fst_words; { + let span = tracing::trace_span!(target: "indexing::details", "compute_prefix_diffs"); + let _entered = span.enter(); puffin::profile_scope!("compute_prefix_diffs"); current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?; @@ -722,6 +746,12 @@ where /// Run the word prefix docids update operation. #[allow(clippy::too_many_arguments)] +#[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "index_documents_word_prefix_docids" +)] fn execute_word_prefix_docids( txn: &mut heed::RwTxn, reader: grenad::Reader>, diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index ab8e27edb..e5392092f 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -146,7 +146,7 @@ impl<'a, 'i> Transform<'a, 'i> { }) } - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn read_documents( &mut self, reader: EnrichedDocumentsBatchReader, @@ -359,7 +359,7 @@ impl<'a, 'i> Transform<'a, 'i> { /// - If the document to remove was inserted by the `read_documents` method before but was NOT present in the db, /// it's added into the grenad to ensure we don't insert it + removed from the list of new documents ids. /// - If the document to remove was not present in either the db or the transform we do nothing. - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn remove_documents( &mut self, mut to_remove: Vec, @@ -450,7 +450,7 @@ impl<'a, 'i> Transform<'a, 'i> { /// - No batching using the standards `remove_documents` and `add_documents` took place /// /// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function. - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::details")] pub fn remove_documents_from_db_no_batch( &mut self, to_remove: &RoaringBitmap, @@ -541,6 +541,7 @@ impl<'a, 'i> Transform<'a, 'i> { // Flatten a document from the fields ids map contained in self and insert the new // created fields. Returns `None` if the document doesn't need to be flattened. + #[tracing::instrument(level = "trace", skip(self, obkv), target = "indexing::transform")] fn flatten_from_fields_ids_map(&mut self, obkv: KvReader) -> Result>> { if obkv .iter() @@ -661,7 +662,7 @@ impl<'a, 'i> Transform<'a, 'i> { /// Generate the `TransformOutput` based on the given sorter that can be generated from any /// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document /// id for the user side and the value must be an obkv where keys are valid fields ids. - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::transform")] pub(crate) fn output_from_sorter( self, wtxn: &mut heed::RwTxn, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 58cb9bb02..7db0279ba 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -115,6 +115,7 @@ impl TypedChunk { /// Write typed chunk in the corresponding LMDB database of the provided index. /// Return new documents seen. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] pub(crate) fn write_typed_chunk_into_index( typed_chunk: TypedChunk, index: &Index, @@ -126,6 +127,8 @@ pub(crate) fn write_typed_chunk_into_index( let mut is_merged_database = false; match typed_chunk { TypedChunk::Documents(obkv_documents_iter) => { + let span = tracing::trace_span!(target: "indexing::write_db", "documents"); + let _entered = span.enter(); let mut operations: Vec = Default::default(); let mut docids = index.documents_ids(wtxn)?; @@ -172,6 +175,9 @@ pub(crate) fn write_typed_chunk_into_index( index.put_documents_ids(wtxn, &docids)?; } TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_word_count_docids"); + let _entered = span.enter(); append_entries_into_database( fid_word_count_docids_iter, &index.field_id_word_count_docids, @@ -187,6 +193,8 @@ pub(crate) fn write_typed_chunk_into_index( exact_word_docids_reader, word_fid_docids_reader, } => { + let span = tracing::trace_span!(target: "indexing::write_db", "word_docids"); + let _entered = span.enter(); let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_reader) }?; append_entries_into_database( word_docids_iter.clone(), @@ -230,6 +238,8 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::WordPositionDocids(word_position_docids_iter) => { + let span = tracing::trace_span!(target: "indexing::write_db", "word_position_docids"); + let _entered = span.enter(); append_entries_into_database( word_position_docids_iter, &index.word_position_docids, @@ -241,16 +251,25 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db","field_id_facet_number_docids"); + let _entered = span.enter(); let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter); indexer.execute(wtxn)?; is_merged_database = true; } TypedChunk::FieldIdFacetStringDocids(facet_id_string_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_facet_string_docids"); + let _entered = span.enter(); let indexer = FacetsUpdate::new(index, FacetType::String, facet_id_string_docids_iter); indexer.execute(wtxn)?; is_merged_database = true; } TypedChunk::FieldIdFacetExistsDocids(facet_id_exists_docids) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_facet_exists_docids"); + let _entered = span.enter(); append_entries_into_database( facet_id_exists_docids, &index.facet_id_exists_docids, @@ -262,6 +281,9 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdFacetIsNullDocids(facet_id_is_null_docids) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_null_docids"); + let _entered = span.enter(); append_entries_into_database( facet_id_is_null_docids, &index.facet_id_is_null_docids, @@ -273,6 +295,8 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => { + let span = tracing::trace_span!(target: "profile::indexing::write_db", "field_id_facet_is_empty_docids"); + let _entered = span.enter(); append_entries_into_database( facet_id_is_empty_docids, &index.facet_id_is_empty_docids, @@ -284,6 +308,9 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "word_pair_proximity_docids"); + let _entered = span.enter(); append_entries_into_database( word_pair_proximity_docids_iter, &index.word_pair_proximity_docids, @@ -295,6 +322,9 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_numbers"); + let _entered = span.enter(); let index_fid_docid_facet_numbers = index.field_id_docid_facet_f64s.remap_types::(); let mut cursor = fid_docid_facet_number.into_cursor()?; @@ -315,6 +345,9 @@ pub(crate) fn write_typed_chunk_into_index( } } TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_strings"); + let _entered = span.enter(); let index_fid_docid_facet_strings = index.field_id_docid_facet_strings.remap_types::(); let mut cursor = fid_docid_facet_string.into_cursor()?; @@ -335,6 +368,8 @@ pub(crate) fn write_typed_chunk_into_index( } } TypedChunk::GeoPoints(geo_points) => { + let span = tracing::trace_span!(target: "indexing::write_db", "geo_points"); + let _entered = span.enter(); let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default(); let mut geo_faceted_docids = index.geo_faceted_documents_ids(wtxn)?; @@ -365,6 +400,8 @@ pub(crate) fn write_typed_chunk_into_index( expected_dimension, embedder_name, } => { + let span = tracing::trace_span!(target: "indexing::write_db", "vector_points"); + let _entered = span.enter(); let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; @@ -483,6 +520,8 @@ pub(crate) fn write_typed_chunk_into_index( log::debug!("Finished vector chunk for {}", embedder_name); } TypedChunk::ScriptLanguageDocids(sl_map) => { + let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids"); + let _entered = span.enter(); for (key, (deletion, addition)) in sl_map { let mut db_key_exists = false; let final_value = match index.script_language_docids.get(wtxn, &key)? { @@ -536,6 +575,7 @@ fn merge_word_docids_reader_into_fst( /// Write provided entries in database using serialize_value function. /// merge_values function is used if an entry already exist in the database. +#[tracing::instrument(skip_all, target = "indexing::write_db")] fn write_entries_into_database( data: grenad::Reader, database: &heed::Database, @@ -582,6 +622,7 @@ where /// merge_values function is used if an entry already exist in the database. /// All provided entries must be ordered. /// If the index is not empty, write_entries_into_database is called instead. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] fn append_entries_into_database( data: grenad::Reader, database: &heed::Database, diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index a3ba42119..3cad79467 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -372,6 +372,11 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.embedder_settings = Setting::Reset; } + #[tracing::instrument( + level = "trace" + skip(self, progress_callback, should_abort, old_fields_ids_map), + target = "indexing::documents" + )] fn reindex( &mut self, progress_callback: &FP, diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs index 544bea224..99c6c815e 100644 --- a/milli/src/update/word_prefix_docids.rs +++ b/milli/src/update/word_prefix_docids.rs @@ -39,7 +39,12 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> { } } - #[logging_timer::time("WordPrefixDocids::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "word_prefix_docids" + )] pub fn execute( self, mut new_word_docids_iter: grenad::ReaderCursor, diff --git a/milli/src/update/words_prefix_integer_docids.rs b/milli/src/update/words_prefix_integer_docids.rs index 819cc097b..23a676bc8 100644 --- a/milli/src/update/words_prefix_integer_docids.rs +++ b/milli/src/update/words_prefix_integer_docids.rs @@ -44,7 +44,12 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> { } } - #[logging_timer::time("WordPrefixIntegerDocids::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "words_prefix_integer_docids" + )] pub fn execute( self, new_word_integer_docids: grenad::Reader, diff --git a/milli/src/update/words_prefixes_fst.rs b/milli/src/update/words_prefixes_fst.rs index f26bf93e5..bb1830727 100644 --- a/milli/src/update/words_prefixes_fst.rs +++ b/milli/src/update/words_prefixes_fst.rs @@ -38,7 +38,12 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> { self } - #[logging_timer::time("WordsPrefixesFst::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "words_prefix_fst" + )] pub fn execute(self) -> Result<()> { puffin::profile_function!();