diff --git a/Cargo.lock b/Cargo.lock index 156917462..008f18a16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2464,7 +2464,6 @@ dependencies = [ "meilisearch-auth", "meilisearch-types", "page_size 0.5.0", - "puffin", "rayon", "roaring", "serde", @@ -3231,12 +3230,6 @@ version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" -[[package]] -name = "lz4_flex" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b8c72594ac26bfd34f2d99dfced2edfaddfe8a476e3ff2ca0eb293d925c4f83" - [[package]] name = "macro_rules_attribute" version = "0.2.0" @@ -3341,7 +3334,6 @@ dependencies = [ "pin-project-lite", "platform-dirs", "prometheus", - "puffin", "rand", "rayon", "regex", @@ -3509,7 +3501,6 @@ dependencies = [ "obkv", "once_cell", "ordered-float", - "puffin", "rand", "rand_pcg", "rayon", @@ -4180,23 +4171,6 @@ version = "2.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" -[[package]] -name = "puffin" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76425abd4e1a0ad4bd6995dd974b52f414fca9974171df8e3708b3e660d05a21" -dependencies = [ - "anyhow", - "bincode", - "byteorder", - "cfg-if", - "instant", - "lz4_flex", - "once_cell", - "parking_lot", - "serde", -] - [[package]] name = "pulp" version = "0.18.9" diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 4b6c0a36d..21fa34733 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -22,7 +22,6 @@ flate2 = "1.0.28" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } page_size = "0.5.0" -puffin = { version = "0.16.0", features = ["serialization"] } rayon = "1.8.1" roaring = { version = "0.10.2", features = ["serde"] } serde = { version = "1.0.195", features = ["derive"] } diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index d10f83a0a..181ac49a3 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -529,8 +529,6 @@ impl IndexScheduler { #[cfg(test)] self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?; - puffin::profile_function!(); - let enqueued = &self.get_status(rtxn, Status::Enqueued)?; let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued; @@ -639,8 +637,6 @@ impl IndexScheduler { self.breakpoint(crate::Breakpoint::InsideProcessBatch); } - puffin::profile_function!(batch.to_string()); - match batch { Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => { // 1. Retrieve the tasks that matched the query at enqueue-time. @@ -1226,8 +1222,6 @@ impl IndexScheduler { index: &'i Index, operation: IndexOperation, ) -> Result> { - puffin::profile_function!(); - match operation { IndexOperation::DocumentClear { mut tasks, .. } => { let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?; diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index 3be18a3f1..ae8e6728a 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -68,19 +68,6 @@ impl RoFeatures { .into()) } } - - pub fn check_puffin(&self) -> Result<()> { - if self.runtime.export_puffin_reports { - Ok(()) - } else { - Err(FeatureNotEnabledError { - disabled_action: "Outputting Puffin reports to disk", - feature: "export puffin reports", - issue_link: "https://github.com/meilisearch/product/discussions/693", - } - .into()) - } - } } impl FeatureData { diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs index 988e75b81..d8625a2c7 100644 --- a/index-scheduler/src/insta_snapshot.rs +++ b/index-scheduler/src/insta_snapshot.rs @@ -32,7 +32,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { features: _, max_number_of_tasks: _, max_number_of_batched_tasks: _, - puffin_frame: _, wake_up: _, dumps_path: _, snapshots_path: _, diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index e4c9cd08f..8a1c2f540 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -33,7 +33,6 @@ pub type Result = std::result::Result; pub type TaskId = u32; use std::collections::{BTreeMap, HashMap}; -use std::fs::File; use std::io::{self, BufReader, Read}; use std::ops::{Bound, RangeBounds}; use std::path::{Path, PathBuf}; @@ -59,7 +58,6 @@ use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfi use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; use meilisearch_types::task_view::TaskView; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; -use puffin::FrameView; use rayon::current_num_threads; use rayon::prelude::{IntoParallelIterator, ParallelIterator}; use roaring::RoaringBitmap; @@ -344,9 +342,6 @@ pub struct IndexScheduler { /// The Authorization header to send to the webhook URL. pub(crate) webhook_authorization_header: Option, - /// A frame to output the indexation profiling files to disk. - pub(crate) puffin_frame: Arc, - /// The path used to create the dumps. pub(crate) dumps_path: PathBuf, @@ -401,7 +396,6 @@ impl IndexScheduler { cleanup_enabled: self.cleanup_enabled, max_number_of_tasks: self.max_number_of_tasks, max_number_of_batched_tasks: self.max_number_of_batched_tasks, - puffin_frame: self.puffin_frame.clone(), snapshots_path: self.snapshots_path.clone(), dumps_path: self.dumps_path.clone(), auth_path: self.auth_path.clone(), @@ -500,7 +494,6 @@ impl IndexScheduler { env, // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things wake_up: Arc::new(SignalEvent::auto(true)), - puffin_frame: Arc::new(puffin::GlobalFrameView::default()), autobatching_enabled: options.autobatching_enabled, cleanup_enabled: options.cleanup_enabled, max_number_of_tasks: options.max_number_of_tasks, @@ -621,10 +614,6 @@ impl IndexScheduler { run.wake_up.wait(); loop { - let puffin_enabled = run.features().check_puffin().is_ok(); - puffin::set_scopes_on(puffin_enabled); - puffin::GlobalProfiler::lock().new_frame(); - match run.tick() { Ok(TickOutcome::TickAgain(_)) => (), Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(), @@ -636,31 +625,6 @@ impl IndexScheduler { } } } - - // Let's write the previous frame to disk but only if - // the user wanted to profile with puffin. - if puffin_enabled { - let mut frame_view = run.puffin_frame.lock(); - if !frame_view.is_empty() { - let now = OffsetDateTime::now_utc(); - let mut file = match File::create(format!("{}.puffin", now)) { - Ok(file) => file, - Err(e) => { - tracing::error!("{e}"); - continue; - } - }; - if let Err(e) = frame_view.save_to_writer(&mut file) { - tracing::error!("{e}"); - } - if let Err(e) = file.sync_all() { - tracing::error!("{e}"); - } - // We erase this frame view as it is no more useful. We want to - // measure the new frames now that we exported the previous ones. - *frame_view = FrameView::default(); - } - } } }) .unwrap(); diff --git a/meilisearch-types/src/features.rs b/meilisearch-types/src/features.rs index 04a5d9d6f..dda9dee51 100644 --- a/meilisearch-types/src/features.rs +++ b/meilisearch-types/src/features.rs @@ -6,7 +6,6 @@ pub struct RuntimeTogglableFeatures { pub vector_store: bool, pub metrics: bool, pub logs_route: bool, - pub export_puffin_reports: bool, } #[derive(Default, Debug, Clone, Copy)] diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index ed62c5f48..75962c450 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -67,7 +67,6 @@ permissive-json-pointer = { path = "../permissive-json-pointer" } pin-project-lite = "0.2.13" platform-dirs = "0.3.0" prometheus = { version = "0.13.3", features = ["process"] } -puffin = { version = "0.16.0", features = ["serialization"] } rand = "0.8.5" rayon = "1.8.0" regex = "1.10.2" diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index 227b485c5..0e02309fa 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -47,8 +47,6 @@ pub struct RuntimeTogglableFeatures { pub metrics: Option, #[deserr(default)] pub logs_route: Option, - #[deserr(default)] - pub export_puffin_reports: Option, } async fn patch_features( @@ -68,21 +66,13 @@ async fn patch_features( vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store), metrics: new_features.0.metrics.unwrap_or(old_features.metrics), logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route), - export_puffin_reports: new_features - .0 - .export_puffin_reports - .unwrap_or(old_features.export_puffin_reports), }; // explicitly destructure for analytics rather than using the `Serialize` implementation, because // the it renames to camelCase, which we don't want for analytics. // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future. - let meilisearch_types::features::RuntimeTogglableFeatures { - vector_store, - metrics, - logs_route, - export_puffin_reports, - } = new_features; + let meilisearch_types::features::RuntimeTogglableFeatures { vector_store, metrics, logs_route } = + new_features; analytics.publish( "Experimental features Updated".to_string(), @@ -90,7 +80,6 @@ async fn patch_features( "vector_store": vector_store, "metrics": metrics, "logs_route": logs_route, - "export_puffin_reports": export_puffin_reports, }), Some(&req), ); diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs index 1a31437f8..c8f8ca105 100644 --- a/meilisearch/tests/dumps/mod.rs +++ b/meilisearch/tests/dumps/mod.rs @@ -1859,8 +1859,7 @@ async fn import_dump_v6_containing_experimental_features() { { "vectorStore": false, "metrics": false, - "logsRoute": false, - "exportPuffinReports": false + "logsRoute": false } "###); diff --git a/meilisearch/tests/features/mod.rs b/meilisearch/tests/features/mod.rs index 3a9812f30..9548567ff 100644 --- a/meilisearch/tests/features/mod.rs +++ b/meilisearch/tests/features/mod.rs @@ -20,8 +20,7 @@ async fn experimental_features() { { "vectorStore": false, "metrics": false, - "logsRoute": false, - "exportPuffinReports": false + "logsRoute": false } "###); @@ -32,8 +31,7 @@ async fn experimental_features() { { "vectorStore": true, "metrics": false, - "logsRoute": false, - "exportPuffinReports": false + "logsRoute": false } "###); @@ -44,8 +42,7 @@ async fn experimental_features() { { "vectorStore": true, "metrics": false, - "logsRoute": false, - "exportPuffinReports": false + "logsRoute": false } "###); @@ -57,8 +54,7 @@ async fn experimental_features() { { "vectorStore": true, "metrics": false, - "logsRoute": false, - "exportPuffinReports": false + "logsRoute": false } "###); @@ -70,8 +66,7 @@ async fn experimental_features() { { "vectorStore": true, "metrics": false, - "logsRoute": false, - "exportPuffinReports": false + "logsRoute": false } "###); } @@ -90,8 +85,7 @@ async fn experimental_feature_metrics() { { "vectorStore": false, "metrics": true, - "logsRoute": false, - "exportPuffinReports": false + "logsRoute": false } "###); @@ -146,7 +140,7 @@ async fn errors() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`", + "message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`", "code": "bad_request", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#bad_request" diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index 028b341cb..9c50df6e1 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/meilisearch/tests/search/hybrid.rs @@ -18,8 +18,7 @@ async fn index_with_documents_user_provided<'a>( { "vectorStore": true, "metrics": false, - "logsRoute": false, - "exportPuffinReports": false + "logsRoute": false } "###); @@ -47,8 +46,7 @@ async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Value) -> I { "vectorStore": true, "metrics": false, - "logsRoute": false, - "exportPuffinReports": false + "logsRoute": false } "###); diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index cd31d4959..379e0a917 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -98,8 +98,7 @@ async fn secrets_are_hidden_in_settings() { { "vectorStore": true, "metrics": false, - "logsRoute": false, - "exportPuffinReports": false + "logsRoute": false } "###); diff --git a/milli/Cargo.toml b/milli/Cargo.toml index c5dddd0fd..4a08e6261 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -67,9 +67,6 @@ filter-parser = { path = "../filter-parser" } # documents words self-join itertools = "0.11.0" -# profiling -puffin = "0.16.0" - csv = "1.3.0" candle-core = { version = "0.4.1" } candle-transformers = { version = "0.4.1" } diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index 6715939dc..3490b55e4 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -21,8 +21,6 @@ impl<'t, 'i> ClearDocuments<'t, 'i> { name = "clear_documents" )] pub fn execute(self) -> Result { - puffin::profile_function!(); - self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?; let Index { env: _env, diff --git a/milli/src/update/index_documents/enrich.rs b/milli/src/update/index_documents/enrich.rs index 162136912..2da717bb0 100644 --- a/milli/src/update/index_documents/enrich.rs +++ b/milli/src/update/index_documents/enrich.rs @@ -29,8 +29,6 @@ pub fn enrich_documents_batch( autogenerate_docids: bool, reader: DocumentsBatchReader, ) -> Result, UserError>> { - puffin::profile_function!(); - let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index(); let mut external_ids = tempfile::tempfile().map(BufWriter::new).map(grenad::Writer::new)?; diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index d97b6639e..9c557de81 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -29,8 +29,6 @@ pub fn extract_docid_word_positions( settings_diff: &InnerIndexSettingsDiff, max_positions_per_attributes: Option, ) -> Result<(grenad::Reader>, ScriptLanguageDocidsMap)> { - puffin::profile_function!(); - let max_positions_per_attributes = max_positions_per_attributes .map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE)); let max_memory = indexer.max_memory_by_thread(); diff --git a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs index 1848a085f..bfd769604 100644 --- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs @@ -23,8 +23,6 @@ pub fn extract_facet_number_docids( indexer: GrenadParameters, _settings_diff: &InnerIndexSettingsDiff, ) -> Result>> { - puffin::profile_function!(); - let max_memory = indexer.max_memory_by_thread(); let mut facet_number_docids_sorter = create_sorter( diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index abffe17ab..3deace127 100644 --- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -28,8 +28,6 @@ pub fn extract_facet_string_docids( indexer: GrenadParameters, _settings_diff: &InnerIndexSettingsDiff, ) -> Result<(grenad::Reader>, grenad::Reader>)> { - puffin::profile_function!(); - let max_memory = indexer.max_memory_by_thread(); let options = NormalizerOption { lossy: true, ..Default::default() }; diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index 123c3b123..a2b060255 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -47,8 +47,6 @@ pub fn extract_fid_docid_facet_values( settings_diff: &InnerIndexSettingsDiff, geo_fields_ids: Option<(FieldId, FieldId)>, ) -> Result { - puffin::profile_function!(); - let max_memory = indexer.max_memory_by_thread(); let mut fid_docid_facet_numbers_sorter = create_sorter( diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs index 51e0642da..f252df1cd 100644 --- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs +++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs @@ -26,8 +26,6 @@ pub fn extract_fid_word_count_docids( indexer: GrenadParameters, _settings_diff: &InnerIndexSettingsDiff, ) -> Result>> { - puffin::profile_function!(); - let max_memory = indexer.max_memory_by_thread(); let mut fid_word_count_docids_sorter = create_sorter( diff --git a/milli/src/update/index_documents/extract/extract_geo_points.rs b/milli/src/update/index_documents/extract/extract_geo_points.rs index cfcc021c6..3d7463fba 100644 --- a/milli/src/update/index_documents/extract/extract_geo_points.rs +++ b/milli/src/update/index_documents/extract/extract_geo_points.rs @@ -20,8 +20,6 @@ pub fn extract_geo_points( primary_key_id: FieldId, (lat_fid, lng_fid): (FieldId, FieldId), ) -> Result>> { - puffin::profile_function!(); - let mut writer = create_writer( indexer.chunk_compression_type, indexer.chunk_compression_level, diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index 724d9ea81..76ec90d65 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -91,8 +91,6 @@ pub fn extract_vector_points( indexer: GrenadParameters, settings_diff: &InnerIndexSettingsDiff, ) -> Result> { - puffin::profile_function!(); - let reindex_vectors = settings_diff.reindex_vectors(); let old_fields_ids_map = &settings_diff.old.fields_ids_map; @@ -295,7 +293,6 @@ fn push_vectors_diff( delta: VectorStateDelta, reindex_vectors: bool, ) -> Result<()> { - puffin::profile_function!(); let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values(); if must_remove // TODO: the below condition works because we erase the vec database when a embedding setting changes. @@ -367,7 +364,6 @@ pub fn extract_embeddings( embedder: Arc, request_threads: &ThreadPoolNoAbort, ) -> Result>> { - puffin::profile_function!(); let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism let n_vectors_per_chunk = embedder.prompt_count_in_chunk_hint(); // number of vectors in a single chunk diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs index 5699f2fb6..457d2359e 100644 --- a/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -36,8 +36,6 @@ pub fn extract_word_docids( grenad::Reader>, grenad::Reader>, )> { - puffin::profile_function!(); - let max_memory = indexer.max_memory_by_thread(); let mut word_fid_docids_sorter = create_sorter( @@ -167,8 +165,6 @@ fn words_into_sorter( add_words: &BTreeSet>, word_fid_docids_sorter: &mut grenad::Sorter, ) -> Result<()> { - puffin::profile_function!(); - use itertools::merge_join_by; use itertools::EitherOrBoth::{Both, Left, Right}; diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs index 23f70ccd2..617338f9f 100644 --- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs @@ -26,7 +26,6 @@ pub fn extract_word_pair_proximity_docids( indexer: GrenadParameters, settings_diff: &InnerIndexSettingsDiff, ) -> Result>> { - puffin::profile_function!(); let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord; let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord; @@ -71,8 +70,6 @@ pub fn extract_word_pair_proximity_docids( // if we change document, we fill the sorter if current_document_id.map_or(false, |id| id != document_id) { - puffin::profile_scope!("Document into sorter"); - // FIXME: span inside of a hot loop might degrade performance and create big reports let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter"); let _entered = span.enter(); @@ -163,7 +160,6 @@ pub fn extract_word_pair_proximity_docids( } if let Some(document_id) = current_document_id { - puffin::profile_scope!("Final document into sorter"); // FIXME: span inside of a hot loop might degrade performance and create big reports let span = tracing::trace_span!(target: "indexing::details", "final_document_into_sorter"); let _entered = span.enter(); @@ -176,7 +172,6 @@ pub fn extract_word_pair_proximity_docids( )?; } { - puffin::profile_scope!("sorter_into_reader"); // FIXME: span inside of a hot loop might degrade performance and create big reports let span = tracing::trace_span!(target: "indexing::details", "sorter_into_reader"); let _entered = span.enter(); diff --git a/milli/src/update/index_documents/extract/extract_word_position_docids.rs b/milli/src/update/index_documents/extract/extract_word_position_docids.rs index 45a05b0d0..50b1617f9 100644 --- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs @@ -25,8 +25,6 @@ pub fn extract_word_position_docids( indexer: GrenadParameters, _settings_diff: &InnerIndexSettingsDiff, ) -> Result>> { - puffin::profile_function!(); - let max_memory = indexer.max_memory_by_thread(); let mut word_position_docids_sorter = create_sorter( @@ -104,8 +102,6 @@ fn words_position_into_sorter( add_word_positions: &BTreeSet<(u16, Vec)>, word_position_docids_sorter: &mut grenad::Sorter, ) -> Result<()> { - puffin::profile_function!(); - use itertools::merge_join_by; use itertools::EitherOrBoth::{Both, Left, Right}; diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 7598c8094..90723bc4a 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -47,8 +47,6 @@ pub(crate) fn data_from_obkv_documents( settings_diff: Arc, max_positions_per_attributes: Option, ) -> Result<()> { - puffin::profile_function!(); - let (original_pipeline_result, flattened_pipeline_result): (Result<_>, Result<_>) = rayon::join( || { original_obkv_chunks @@ -90,7 +88,6 @@ pub(crate) fn data_from_obkv_documents( lmdb_writer_sx.clone(), extract_fid_word_count_docids, TypedChunk::FieldIdWordCountDocids, - "field-id-wordcount-docids", ); run_extraction_task::< _, @@ -117,7 +114,6 @@ pub(crate) fn data_from_obkv_documents( word_fid_docids_reader, } }, - "word-docids", ); run_extraction_task::<_, _, grenad::Reader>>( @@ -127,7 +123,6 @@ pub(crate) fn data_from_obkv_documents( lmdb_writer_sx.clone(), extract_word_position_docids, TypedChunk::WordPositionDocids, - "word-position-docids", ); run_extraction_task::< @@ -141,7 +136,6 @@ pub(crate) fn data_from_obkv_documents( lmdb_writer_sx.clone(), extract_facet_string_docids, TypedChunk::FieldIdFacetStringDocids, - "field-id-facet-string-docids", ); run_extraction_task::<_, _, grenad::Reader>>( @@ -151,7 +145,6 @@ pub(crate) fn data_from_obkv_documents( lmdb_writer_sx.clone(), extract_facet_number_docids, TypedChunk::FieldIdFacetNumberDocids, - "field-id-facet-number-docids", ); run_extraction_task::<_, _, grenad::Reader>>( @@ -161,7 +154,6 @@ pub(crate) fn data_from_obkv_documents( lmdb_writer_sx.clone(), extract_word_pair_proximity_docids, TypedChunk::WordPairProximityDocids, - "word-pair-proximity-docids", ); } @@ -185,7 +177,6 @@ fn run_extraction_task( lmdb_writer_sx: Sender>, extract_fn: FE, serialize_fn: FS, - name: &'static str, ) where FE: Fn( grenad::Reader, @@ -203,7 +194,7 @@ fn run_extraction_task( rayon::spawn(move || { let child_span = tracing::trace_span!(target: "indexing::extract::details", parent: ¤t_span, "extract_multiple_chunks"); let _entered = child_span.enter(); - puffin::profile_scope!("extract_multiple_chunks", name); + match extract_fn(chunk, indexer, &settings_diff) { Ok(chunk) => { let _ = lmdb_writer_sx.send(Ok(serialize_fn(chunk))); diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs index b0e3654a9..aa574024d 100644 --- a/milli/src/update/index_documents/helpers/grenad_helpers.rs +++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs @@ -61,7 +61,6 @@ pub fn sorter_into_reader( sorter: grenad::Sorter, indexer: GrenadParameters, ) -> Result>> { - puffin::profile_function!(); let mut writer = create_writer( indexer.chunk_compression_type, indexer.chunk_compression_level, @@ -182,8 +181,6 @@ where FS: for<'a> Fn(&'a [u8], &'a mut Vec) -> Result<&'a [u8]>, FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec) -> Result>, { - puffin::profile_function!(); - let mut buffer = Vec::new(); let database = database.remap_types::(); diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index dccfbe795..f281becd6 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -141,8 +141,6 @@ where mut self, reader: DocumentsBatchReader, ) -> Result<(Self, StdResult)> { - puffin::profile_function!(); - // Early return when there is no document to add if reader.is_empty() { return Ok((self, Ok(0))); @@ -187,8 +185,6 @@ where mut self, to_delete: Vec, ) -> Result<(Self, StdResult)> { - puffin::profile_function!(); - // Early return when there is no document to add if to_delete.is_empty() { // Maintains Invariant: remove documents actually always returns Ok for the inner result @@ -223,8 +219,6 @@ where mut self, to_delete: &RoaringBitmap, ) -> Result<(Self, u64)> { - puffin::profile_function!(); - // Early return when there is no document to add if to_delete.is_empty() { return Ok((self, 0)); @@ -249,8 +243,6 @@ where name = "index_documents" )] pub fn execute(mut self) -> Result { - puffin::profile_function!(); - if self.added_documents == 0 && self.deleted_documents == 0 { let number_of_documents = self.index.number_of_documents(self.wtxn)?; return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents }); @@ -279,8 +271,6 @@ where FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, { - puffin::profile_function!(); - let TransformOutput { primary_key, mut settings_diff, @@ -404,7 +394,7 @@ where rayon::spawn(move || { let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "extract_and_send_grenad_chunks"); let _enter = child_span.enter(); - puffin::profile_scope!("extract_and_send_grenad_chunks"); + // split obkv file into several chunks let original_chunk_iter = match original_documents { Some(original_documents) => { @@ -612,8 +602,6 @@ where FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, { - puffin::profile_function!(); - // Merged databases are already been indexed, we start from this count; let mut databases_seen = MERGED_DATABASE_COUNT; @@ -657,7 +645,6 @@ where { let span = tracing::trace_span!(target: "indexing::details", "compute_prefix_diffs"); let _entered = span.enter(); - puffin::profile_scope!("compute_prefix_diffs"); current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?; @@ -797,8 +784,6 @@ fn execute_word_prefix_docids( common_prefix_fst_words: &[&[String]], del_prefix_fst_words: &HashSet>, ) -> Result<()> { - puffin::profile_function!(); - let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db); builder.chunk_compression_type = indexer_config.chunk_compression_type; builder.chunk_compression_level = indexer_config.chunk_compression_level; diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 733e74800..41a0a55cf 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -161,8 +161,6 @@ impl<'a, 'i> Transform<'a, 'i> { FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, { - puffin::profile_function!(); - let (mut cursor, fields_index) = reader.into_cursor_and_fields_index(); let external_documents_ids = self.index.external_documents_ids(); let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?; @@ -375,8 +373,6 @@ impl<'a, 'i> Transform<'a, 'i> { where FA: Fn() -> bool + Sync, { - puffin::profile_function!(); - // there may be duplicates in the documents to remove. to_remove.sort_unstable(); to_remove.dedup(); @@ -466,8 +462,6 @@ impl<'a, 'i> Transform<'a, 'i> { where FA: Fn() -> bool + Sync, { - puffin::profile_function!(); - let mut documents_deleted = 0; let mut document_sorter_value_buffer = Vec::new(); let mut document_sorter_key_buffer = Vec::new(); @@ -686,8 +680,6 @@ impl<'a, 'i> Transform<'a, 'i> { where F: Fn(UpdateIndexingStep) + Sync, { - puffin::profile_function!(); - let primary_key = self .index .primary_key(wtxn)? diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 2345551ab..27f760c2a 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -118,65 +118,6 @@ impl TypedChunk { } } -impl TypedChunk { - pub fn to_debug_string(&self) -> String { - match self { - TypedChunk::FieldIdDocidFacetStrings(grenad) => { - format!("FieldIdDocidFacetStrings {{ number_of_entries: {} }}", grenad.len()) - } - TypedChunk::FieldIdDocidFacetNumbers(grenad) => { - format!("FieldIdDocidFacetNumbers {{ number_of_entries: {} }}", grenad.len()) - } - TypedChunk::Documents(grenad) => { - format!("Documents {{ number_of_entries: {} }}", grenad.len()) - } - TypedChunk::FieldIdWordCountDocids(grenad) => { - format!("FieldIdWordcountDocids {{ number_of_entries: {} }}", grenad.len()) - } - TypedChunk::WordDocids { - word_docids_reader, - exact_word_docids_reader, - word_fid_docids_reader, - } => format!( - "WordDocids {{ word_docids_reader: {}, exact_word_docids_reader: {}, word_fid_docids_reader: {} }}", - word_docids_reader.len(), - exact_word_docids_reader.len(), - word_fid_docids_reader.len() - ), - TypedChunk::WordPositionDocids(grenad) => { - format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len()) - } - TypedChunk::WordPairProximityDocids(grenad) => { - format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len()) - } - TypedChunk::FieldIdFacetStringDocids((grenad, _)) => { - format!("FieldIdFacetStringDocids {{ number_of_entries: {} }}", grenad.len()) - } - TypedChunk::FieldIdFacetNumberDocids(grenad) => { - format!("FieldIdFacetNumberDocids {{ number_of_entries: {} }}", grenad.len()) - } - TypedChunk::FieldIdFacetExistsDocids(grenad) => { - format!("FieldIdFacetExistsDocids {{ number_of_entries: {} }}", grenad.len()) - } - TypedChunk::FieldIdFacetIsNullDocids(grenad) => { - format!("FieldIdFacetIsNullDocids {{ number_of_entries: {} }}", grenad.len()) - } - TypedChunk::FieldIdFacetIsEmptyDocids(grenad) => { - format!("FieldIdFacetIsEmptyDocids {{ number_of_entries: {} }}", grenad.len()) - } - TypedChunk::GeoPoints(grenad) => { - format!("GeoPoints {{ number_of_entries: {} }}", grenad.len()) - } - TypedChunk::VectorPoints{ remove_vectors, manual_vectors, embeddings, expected_dimension, embedder_name } => { - format!("VectorPoints {{ remove_vectors: {}, manual_vectors: {}, embeddings: {}, dimension: {}, embedder_name: {} }}", remove_vectors.len(), manual_vectors.len(), embeddings.as_ref().map(|e| e.len()).unwrap_or_default(), expected_dimension, embedder_name) - } - TypedChunk::ScriptLanguageDocids(sl_map) => { - format!("ScriptLanguageDocids {{ number_of_entries: {} }}", sl_map.len()) - } - } - } -} - /// Write typed chunk in the corresponding LMDB database of the provided index. /// Return new documents seen. #[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] @@ -185,8 +126,6 @@ pub(crate) fn write_typed_chunk_into_index( index: &Index, wtxn: &mut RwTxn, ) -> Result<(RoaringBitmap, bool)> { - puffin::profile_function!(typed_chunks[0].to_debug_string()); - let mut is_merged_database = false; match typed_chunks[0] { TypedChunk::Documents(_) => { @@ -877,7 +816,6 @@ where FS: for<'a> Fn(&'a [u8], &'a mut Vec) -> Result<&'a [u8]>, FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec) -> Result>, { - puffin::profile_function!(); let mut buffer = Vec::new(); let database = database.remap_types::(); diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 0fd39ce77..133f0e3a8 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -398,8 +398,6 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, { - puffin::profile_function!(); - // if the settings are set before any document update, we don't need to do anything, and // will set the primary key during the first document addition. if self.index.number_of_documents(self.wtxn)? == 0 { diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs index 1db066058..925635f80 100644 --- a/milli/src/update/word_prefix_docids.rs +++ b/milli/src/update/word_prefix_docids.rs @@ -52,8 +52,6 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> { common_prefix_fst_words: &[&[String]], del_prefix_fst_words: &HashSet>, ) -> Result<()> { - puffin::profile_function!(); - // It is forbidden to keep a mutable reference into the database // and write into it at the same time, therefore we write into another file. let mut prefix_docids_sorter = create_sorter( diff --git a/milli/src/update/words_prefix_integer_docids.rs b/milli/src/update/words_prefix_integer_docids.rs index 272d465fd..9b6aa21ae 100644 --- a/milli/src/update/words_prefix_integer_docids.rs +++ b/milli/src/update/words_prefix_integer_docids.rs @@ -57,7 +57,6 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> { common_prefix_fst_words: &[&[String]], del_prefix_fst_words: &HashSet>, ) -> Result<()> { - puffin::profile_function!(); debug!("Computing and writing the word levels integers docids into LMDB on disk..."); let mut prefix_integer_docids_sorter = create_sorter( diff --git a/milli/src/update/words_prefixes_fst.rs b/milli/src/update/words_prefixes_fst.rs index 8b438cef3..d47d6d14c 100644 --- a/milli/src/update/words_prefixes_fst.rs +++ b/milli/src/update/words_prefixes_fst.rs @@ -45,8 +45,6 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> { name = "words_prefix_fst" )] pub fn execute(self) -> Result<()> { - puffin::profile_function!(); - let words_fst = self.index.words_fst(self.wtxn)?; let mut current_prefix = vec![SmallString32::new(); self.max_prefix_length];