From c5f7893fbbcec34bfee8f2ac91474c62fc9adcee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 21 Sep 2023 16:21:21 +0200 Subject: [PATCH 01/13] Remove the puffin http dependency --- Cargo.lock | 13 ------------- meilisearch/Cargo.toml | 4 +--- meilisearch/src/main.rs | 4 +--- 3 files changed, 2 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 89f3561bc..a6b22871f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2564,7 +2564,6 @@ dependencies = [ "platform-dirs", "prometheus", "puffin", - "puffin_http", "rand", "rayon", "regex", @@ -3236,18 +3235,6 @@ dependencies = [ "serde", ] -[[package]] -name = "puffin_http" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13bffc600c35913d282ae1e96a6ffcdf36dc7a7cdb9310e0ba15914d258c8193" -dependencies = [ - "anyhow", - "crossbeam-channel", - "log", - "puffin", -] - [[package]] name = "quote" version = "1.0.32" diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index a2536a52f..35a4a4304 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -69,8 +69,7 @@ permissive-json-pointer = { path = "../permissive-json-pointer" } pin-project-lite = "0.2.9" platform-dirs = "0.3.0" prometheus = { version = "0.13.3", features = ["process"] } -puffin = "0.16.0" -puffin_http = { version = "0.13.0", optional = true } +puffin = { version = "0.16.0", features = ["serialization"] } rand = "0.8.5" rayon = "1.7.0" regex = "1.7.3" @@ -135,7 +134,6 @@ zip = { version = "0.6.4", optional = true } [features] default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"] analytics = ["segment"] -profile-with-puffin = ["dep:puffin_http"] mini-dashboard = [ "actix-web-static-files", "static-files", diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index a3905d451..b53335ed2 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -30,9 +30,7 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> { let (opt, config_read_from) = Opt::try_build()?; - #[cfg(feature = "profile-with-puffin")] - let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?; - puffin::set_scopes_on(cfg!(feature = "profile-with-puffin")); + puffin::set_scopes_on(opt.experimental_profile_with_puffin); anyhow::ensure!( !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage), From 656dadabead867912e6fabccfd95e7506dc918ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 21 Sep 2023 16:30:13 +0200 Subject: [PATCH 02/13] Expose an experimental flag to write the puffin reports to disk --- index-scheduler/src/lib.rs | 26 ++++++++++++++++++- .../src/analytics/segment_analytics.rs | 1 + meilisearch/src/lib.rs | 1 + meilisearch/src/option.rs | 11 ++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index df87d868d..2f6a6ea2b 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -33,6 +33,7 @@ pub type Result = std::result::Result; pub type TaskId = u32; use std::collections::{BTreeMap, HashMap}; +use std::fs::File; use std::ops::{Bound, RangeBounds}; use std::path::{Path, PathBuf}; use std::sync::atomic::AtomicBool; @@ -256,6 +257,8 @@ pub struct IndexSchedulerOptions { /// The maximum number of tasks stored in the task queue before starting /// to auto schedule task deletions. pub max_number_of_tasks: usize, + /// Weither we need to export indexation puffin reports. + pub profile_with_puffin: bool, /// The experimental features enabled for this instance. pub instance_features: InstanceTogglableFeatures, } @@ -314,6 +317,9 @@ pub struct IndexScheduler { /// the finished tasks automatically. pub(crate) max_number_of_tasks: usize, + /// Wether we must output indexation profiling files to disk. + pub(crate) puffin_frame: Option>, + /// The path used to create the dumps. pub(crate) dumps_path: PathBuf, @@ -364,6 +370,7 @@ impl IndexScheduler { wake_up: self.wake_up.clone(), autobatching_enabled: self.autobatching_enabled, max_number_of_tasks: self.max_number_of_tasks, + puffin_frame: self.puffin_frame.clone(), snapshots_path: self.snapshots_path.clone(), dumps_path: self.dumps_path.clone(), auth_path: self.auth_path.clone(), @@ -457,6 +464,9 @@ impl IndexScheduler { env, // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things wake_up: Arc::new(SignalEvent::auto(true)), + puffin_frame: options + .profile_with_puffin + .then(|| Arc::new(puffin::GlobalFrameView::default())), autobatching_enabled: options.autobatching_enabled, max_number_of_tasks: options.max_number_of_tasks, dumps_path: options.dumps_path, @@ -1070,7 +1080,21 @@ impl IndexScheduler { let batch = match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? { Some(batch) => batch, - None => return Ok(TickOutcome::WaitForSignal), + None => { + // Let's write the previous save to disk but only if + // the user wanted to profile with puffin. + if let Some(global_frame_view) = &self.puffin_frame { + let frame_view = global_frame_view.lock(); + if !frame_view.is_empty() { + let mut file = + File::create(format!("{}.puffin", OffsetDateTime::now_utc()))?; + frame_view.save_to_writer(&mut file)?; + file.sync_all()?; + } + } + + return Ok(TickOutcome::WaitForSignal); + } }; let index_uid = batch.index_uid().map(ToOwned::to_owned); drop(rtxn); diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 2f0014ab7..d7869525e 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -285,6 +285,7 @@ impl From for Infos { db_path, experimental_enable_metrics, experimental_reduce_indexing_memory_usage, + experimental_profile_with_puffin: _, http_addr, master_key: _, env, diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index ef821f49f..cf00789b3 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -237,6 +237,7 @@ fn open_or_create_database_unchecked( indexer_config: (&opt.indexer_options).try_into()?, autobatching_enabled: true, max_number_of_tasks: 1_000_000, + profile_with_puffin: opt.experimental_profile_with_puffin, index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize, index_count: DEFAULT_INDEX_COUNT, instance_features, diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index b8489c3e3..569497b9f 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -51,6 +51,7 @@ const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE"; +const MEILI_EXPERIMENTAL_PROFILE_WITH_PUFFIN: &str = "MEILI_EXPERIMENTAL_PROFILE_WITH_PUFFIN"; const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; const DEFAULT_DB_PATH: &str = "./data.ms"; @@ -301,6 +302,11 @@ pub struct Opt { #[serde(default)] pub experimental_reduce_indexing_memory_usage: bool, + /// Experimental flag to export puffin reports, see: + #[clap(long, env = MEILI_EXPERIMENTAL_PROFILE_WITH_PUFFIN)] + #[serde(default)] + pub experimental_profile_with_puffin: bool, + #[serde(flatten)] #[clap(flatten)] pub indexer_options: IndexerOpts, @@ -394,6 +400,7 @@ impl Opt { no_analytics, experimental_enable_metrics: enable_metrics_route, experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage, + experimental_profile_with_puffin, } = self; export_to_env_if_not_present(MEILI_DB_PATH, db_path); export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); @@ -439,6 +446,10 @@ impl Opt { MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE, reduce_indexing_memory_usage.to_string(), ); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_PROFILE_WITH_PUFFIN, + experimental_profile_with_puffin.to_string(), + ); indexer_options.export_to_env(); } From 62dfd09dc60949e3a2333a2e177a5352950b7fd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 20 Sep 2023 11:13:29 +0200 Subject: [PATCH 03/13] Add more puffin logs to the deletion functions --- milli/src/update/delete_documents.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index e23714530..164ad0c7e 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -108,15 +108,17 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { self.delete_document(docid); Some(docid) } - pub fn execute(self) -> Result { - puffin::profile_function!(); + pub fn execute(self) -> Result { let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } = self.execute_inner()?; Ok(DocumentDeletionResult { deleted_documents, remaining_documents }) } + pub(crate) fn execute_inner(mut self) -> Result { + puffin::profile_function!(); + self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?; // We retrieve the current documents ids that are in the database. @@ -476,6 +478,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { C: for<'a> BytesDecode<'a, DItem = RoaringBitmap> + for<'a> BytesEncode<'a, EItem = RoaringBitmap>, { + puffin::profile_function!(); + while let Some(result) = iter.next() { let (bytes, mut docids) = result?; let previous_len = docids.len(); @@ -498,6 +502,8 @@ fn remove_from_word_prefix_docids( db: &Database, to_remove: &RoaringBitmap, ) -> Result>> { + puffin::profile_function!(); + let mut prefixes_to_delete = fst::SetBuilder::memory(); // We iterate over the word prefix docids database and remove the deleted documents ids @@ -528,6 +534,8 @@ fn remove_from_word_docids( words_to_keep: &mut BTreeSet, words_to_remove: &mut BTreeSet, ) -> Result<()> { + puffin::profile_function!(); + // We create an iterator to be able to get the content and delete the word docids. // It's faster to acquire a cursor to get and delete or put, as we avoid traversing // the LMDB B-Tree two times but only once. @@ -559,6 +567,8 @@ fn remove_docids_from_field_id_docid_facet_value( field_id: FieldId, to_remove: &RoaringBitmap, ) -> heed::Result>> { + puffin::profile_function!(); + let db = match facet_type { FacetType::String => { index.field_id_docid_facet_strings.remap_types::() @@ -594,6 +604,8 @@ fn remove_docids_from_facet_id_docids<'a, C>( where C: heed::BytesDecode<'a> + heed::BytesEncode<'a>, { + puffin::profile_function!(); + let mut iter = db.remap_key_type::().iter_mut(wtxn)?; while let Some(result) = iter.next() { let (bytes, mut docids) = result?; From 58db8d85ec78f2099f6d055465b74168dda84544 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 25 Sep 2023 14:28:54 +0200 Subject: [PATCH 04/13] Add the `exportPuffinReports` option to the runtime features route --- index-scheduler/src/features.rs | 13 +++++++++++++ meilisearch-types/src/features.rs | 1 + meilisearch/src/routes/features.rs | 15 ++++++++++++--- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index 5a663fe67..ddb7f31d7 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -67,6 +67,19 @@ impl RoFeatures { .into()) } } + + pub fn check_puffin(&self) -> Result<()> { + if self.runtime.export_puffin_reports { + Ok(()) + } else { + Err(FeatureNotEnabledError { + disabled_action: "Outputting Puffin reports to disk", + feature: "export puffin reports", + issue_link: "https://github.com/meilisearch/product/discussions/693", + } + .into()) + } + } } impl FeatureData { diff --git a/meilisearch-types/src/features.rs b/meilisearch-types/src/features.rs index f62300485..a8a3646f6 100644 --- a/meilisearch-types/src/features.rs +++ b/meilisearch-types/src/features.rs @@ -5,6 +5,7 @@ use serde::{Deserialize, Serialize}; pub struct RuntimeTogglableFeatures { pub score_details: bool, pub vector_store: bool, + pub export_puffin_reports: bool, } #[derive(Default, Debug, Clone, Copy)] diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index a2822b4d4..6f68b5b3c 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -44,6 +44,8 @@ pub struct RuntimeTogglableFeatures { pub score_details: Option, #[deserr(default)] pub vector_store: Option, + #[deserr(default)] + pub export_puffin_reports: Option, } async fn patch_features( @@ -58,23 +60,30 @@ async fn patch_features( let features = index_scheduler.features()?; let old_features = features.runtime_features(); - let new_features = meilisearch_types::features::RuntimeTogglableFeatures { score_details: new_features.0.score_details.unwrap_or(old_features.score_details), vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store), + export_puffin_reports: new_features + .0 + .export_puffin_reports + .unwrap_or(old_features.export_puffin_reports), }; // explicitly destructure for analytics rather than using the `Serialize` implementation, because // the it renames to camelCase, which we don't want for analytics. // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future. - let meilisearch_types::features::RuntimeTogglableFeatures { score_details, vector_store } = - new_features; + let meilisearch_types::features::RuntimeTogglableFeatures { + score_details, + vector_store, + export_puffin_reports, + } = new_features; analytics.publish( "Experimental features Updated".to_string(), json!({ "score_details": score_details, "vector_store": vector_store, + "export_puffin_reports": export_puffin_reports, }), Some(&req), ); From 0d4acf2daa05c9056e5604be07c36a53c6de8350 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 25 Sep 2023 14:29:17 +0200 Subject: [PATCH 05/13] Fix the metrics product URL --- index-scheduler/src/features.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index ddb7f31d7..e78a41b5b 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -49,7 +49,7 @@ impl RoFeatures { Err(FeatureNotEnabledError { disabled_action: "Getting metrics", feature: "metrics", - issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518", + issue_link: "https://github.com/meilisearch/product/discussions/625", } .into()) } From 90a626bf8056b66b4e1a4532c3f4638a8c52660e Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 25 Sep 2023 14:44:15 +0200 Subject: [PATCH 06/13] Use the runtime feature to enable puffin report exporting --- index-scheduler/src/lib.rs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 2f6a6ea2b..f0842a1af 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -53,6 +53,7 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder; use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; +use puffin::FrameView; use roaring::RoaringBitmap; use synchronoise::SignalEvent; use time::format_description::well_known::Rfc3339; @@ -1072,6 +1073,8 @@ impl IndexScheduler { self.breakpoint(Breakpoint::Start); } + let puffin_enabled = self.features()?.check_puffin().is_ok(); + puffin::set_scopes_on(puffin_enabled); puffin::GlobalProfiler::lock().new_frame(); self.cleanup_task_queue()?; @@ -1081,15 +1084,19 @@ impl IndexScheduler { match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? { Some(batch) => batch, None => { - // Let's write the previous save to disk but only if + // Let's write the previous frame to disk but only if // the user wanted to profile with puffin. - if let Some(global_frame_view) = &self.puffin_frame { - let frame_view = global_frame_view.lock(); - if !frame_view.is_empty() { - let mut file = - File::create(format!("{}.puffin", OffsetDateTime::now_utc()))?; - frame_view.save_to_writer(&mut file)?; - file.sync_all()?; + if puffin_enabled { + if let Some(global_frame_view) = &self.puffin_frame { + let mut frame_view = global_frame_view.lock(); + if !frame_view.is_empty() { + let now = OffsetDateTime::now_utc(); + let mut file = File::create(format!("{}.puffin", now))?; + frame_view.save_to_writer(&mut file)?; + file.sync_all()?; + // We erase everything on this frame as it is not more useful. + *frame_view = FrameView::default(); + } } } From 513e61e9a392069e43eaa84ccb03d6dbf2a6704e Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 25 Sep 2023 14:48:41 +0200 Subject: [PATCH 07/13] Remove the experimental CLI flag --- index-scheduler/src/lib.rs | 29 ++++++++----------- .../src/analytics/segment_analytics.rs | 1 - meilisearch/src/lib.rs | 1 - meilisearch/src/main.rs | 2 -- meilisearch/src/option.rs | 11 ------- 5 files changed, 12 insertions(+), 32 deletions(-) diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index f0842a1af..c8a7f0c8b 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -258,8 +258,6 @@ pub struct IndexSchedulerOptions { /// The maximum number of tasks stored in the task queue before starting /// to auto schedule task deletions. pub max_number_of_tasks: usize, - /// Weither we need to export indexation puffin reports. - pub profile_with_puffin: bool, /// The experimental features enabled for this instance. pub instance_features: InstanceTogglableFeatures, } @@ -318,8 +316,8 @@ pub struct IndexScheduler { /// the finished tasks automatically. pub(crate) max_number_of_tasks: usize, - /// Wether we must output indexation profiling files to disk. - pub(crate) puffin_frame: Option>, + /// A frame to output the indexation profiling files to disk. + pub(crate) puffin_frame: Arc, /// The path used to create the dumps. pub(crate) dumps_path: PathBuf, @@ -465,9 +463,7 @@ impl IndexScheduler { env, // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things wake_up: Arc::new(SignalEvent::auto(true)), - puffin_frame: options - .profile_with_puffin - .then(|| Arc::new(puffin::GlobalFrameView::default())), + puffin_frame: Arc::new(puffin::GlobalFrameView::default()), autobatching_enabled: options.autobatching_enabled, max_number_of_tasks: options.max_number_of_tasks, dumps_path: options.dumps_path, @@ -1087,16 +1083,15 @@ impl IndexScheduler { // Let's write the previous frame to disk but only if // the user wanted to profile with puffin. if puffin_enabled { - if let Some(global_frame_view) = &self.puffin_frame { - let mut frame_view = global_frame_view.lock(); - if !frame_view.is_empty() { - let now = OffsetDateTime::now_utc(); - let mut file = File::create(format!("{}.puffin", now))?; - frame_view.save_to_writer(&mut file)?; - file.sync_all()?; - // We erase everything on this frame as it is not more useful. - *frame_view = FrameView::default(); - } + let mut frame_view = self.puffin_frame.lock(); + if !frame_view.is_empty() { + let now = OffsetDateTime::now_utc(); + let mut file = File::create(format!("{}.puffin", now))?; + frame_view.save_to_writer(&mut file)?; + file.sync_all()?; + // We erase this frame view as it is no more useful. We want to + // measure the new frames now that we exported the previous ones. + *frame_view = FrameView::default(); } } diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index d7869525e..2f0014ab7 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -285,7 +285,6 @@ impl From for Infos { db_path, experimental_enable_metrics, experimental_reduce_indexing_memory_usage, - experimental_profile_with_puffin: _, http_addr, master_key: _, env, diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index cf00789b3..ef821f49f 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -237,7 +237,6 @@ fn open_or_create_database_unchecked( indexer_config: (&opt.indexer_options).try_into()?, autobatching_enabled: true, max_number_of_tasks: 1_000_000, - profile_with_puffin: opt.experimental_profile_with_puffin, index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize, index_count: DEFAULT_INDEX_COUNT, instance_features, diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index b53335ed2..246d62c3b 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -30,8 +30,6 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> { let (opt, config_read_from) = Opt::try_build()?; - puffin::set_scopes_on(opt.experimental_profile_with_puffin); - anyhow::ensure!( !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage), "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows" diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 569497b9f..b8489c3e3 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -51,7 +51,6 @@ const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE"; -const MEILI_EXPERIMENTAL_PROFILE_WITH_PUFFIN: &str = "MEILI_EXPERIMENTAL_PROFILE_WITH_PUFFIN"; const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; const DEFAULT_DB_PATH: &str = "./data.ms"; @@ -302,11 +301,6 @@ pub struct Opt { #[serde(default)] pub experimental_reduce_indexing_memory_usage: bool, - /// Experimental flag to export puffin reports, see: - #[clap(long, env = MEILI_EXPERIMENTAL_PROFILE_WITH_PUFFIN)] - #[serde(default)] - pub experimental_profile_with_puffin: bool, - #[serde(flatten)] #[clap(flatten)] pub indexer_options: IndexerOpts, @@ -400,7 +394,6 @@ impl Opt { no_analytics, experimental_enable_metrics: enable_metrics_route, experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage, - experimental_profile_with_puffin, } = self; export_to_env_if_not_present(MEILI_DB_PATH, db_path); export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); @@ -446,10 +439,6 @@ impl Opt { MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE, reduce_indexing_memory_usage.to_string(), ); - export_to_env_if_not_present( - MEILI_EXPERIMENTAL_PROFILE_WITH_PUFFIN, - experimental_profile_with_puffin.to_string(), - ); indexer_options.export_to_env(); } From c45c6cf54ceb14798fd76a197c27004c7ff136d4 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 25 Sep 2023 15:02:52 +0200 Subject: [PATCH 08/13] Update the PROFILING.md file --- PROFILING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/PROFILING.md b/PROFILING.md index 4ae21f2f5..daf46bbc2 100644 --- a/PROFILING.md +++ b/PROFILING.md @@ -1,14 +1,14 @@ # Profiling Meilisearch -Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options. +Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options [in Puffin Viewer](https://github.com/embarkstudios/puffin#ui). ![An example profiling with Puffin viewer](assets/profiling-example.png) ## Profiling the Indexing Process -When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method. +When you enable [the `exportPuffinReports` experimental feature](https://www.meilisearch.com/docs/learn/experimental/overview) of Meilisearch, Puffin reports with the `.puffin` extension will be automatically exported to disk. When this option is enabled, the engine will automatically create a "frame" whenever it executes the `IndexScheduler::tick` method. -Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server. +[Puffin Viewer](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) is used to analyze the reports. Those reports show areas where Meilisearch spent time during indexing. Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help. From f2a9e1ebbb5290b38bca3f4aaf04c40eef53406c Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 25 Sep 2023 15:38:08 +0200 Subject: [PATCH 09/13] Improve the debugging experience in the puffin reports --- index-scheduler/src/batch.rs | 51 +++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index ccdcbcbb6..e544c8769 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -17,6 +17,8 @@ tasks individally, but should be much faster since we are only performing one indexing operation. */ +use core::fmt; +use std::borrow::Cow; use std::collections::{BTreeSet, HashSet}; use std::ffi::OsStr; use std::fs::{self, File}; @@ -199,6 +201,29 @@ impl Batch { } } +impl fmt::Display for Batch { + /// A text used when we debug the profiling reports. + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let index_uid = self.index_uid(); + let tasks = self.ids(); + let task = match self { + Batch::TaskCancelation { .. } => Cow::Borrowed("TaskCancelation"), + Batch::TaskDeletion(_) => Cow::Borrowed("TaskDeletion"), + Batch::SnapshotCreation(_) => Cow::Borrowed("SnapshotCreation"), + Batch::Dump(_) => Cow::Borrowed("Dump"), + Batch::IndexOperation { op, .. } => Cow::Owned(op.to_string()), + Batch::IndexCreation { .. } => Cow::Borrowed("IndexCreation"), + Batch::IndexUpdate { .. } => Cow::Borrowed("IndexUpdate"), + Batch::IndexDeletion { .. } => Cow::Borrowed("IndexDeletion"), + Batch::IndexSwap { .. } => Cow::Borrowed("IndexSwap"), + }; + match index_uid { + Some(name) => f.write_fmt(format_args!("{task} on {name:?} from tasks: {tasks:?}")), + None => f.write_fmt(format_args!("{task} from tasks: {tasks:?}")), + } + } +} + impl IndexOperation { pub fn index_uid(&self) -> &str { match self { @@ -213,6 +238,30 @@ impl IndexOperation { } } +impl fmt::Display for IndexOperation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + IndexOperation::DocumentOperation { .. } => { + f.write_str("IndexOperation::DocumentOperation") + } + IndexOperation::DocumentDeletion { .. } => { + f.write_str("IndexOperation::DocumentDeletion") + } + IndexOperation::IndexDocumentDeletionByFilter { .. } => { + f.write_str("IndexOperation::IndexDocumentDeletionByFilter") + } + IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"), + IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"), + IndexOperation::DocumentClearAndSetting { .. } => { + f.write_str("IndexOperation::DocumentClearAndSetting") + } + IndexOperation::SettingsAndDocumentOperation { .. } => { + f.write_str("IndexOperation::SettingsAndDocumentOperation") + } + } + } +} + impl IndexScheduler { /// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`]. /// @@ -581,7 +630,7 @@ impl IndexScheduler { self.breakpoint(crate::Breakpoint::InsideProcessBatch); } - puffin::profile_function!(format!("{:?}", batch)); + puffin::profile_function!(batch.to_string()); match batch { Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => { From bf8fac6676e44623cba0ec24b499d2ff8d520e4e Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 25 Sep 2023 15:39:47 +0200 Subject: [PATCH 10/13] Fix the tests --- index-scheduler/src/insta_snapshot.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs index afcfdb270..f820ce99d 100644 --- a/index-scheduler/src/insta_snapshot.rs +++ b/index-scheduler/src/insta_snapshot.rs @@ -30,6 +30,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { index_mapper, features: _, max_number_of_tasks: _, + puffin_frame: _, wake_up: _, dumps_path: _, snapshots_path: _, From 1b8871a585711e72d642cfebcfdf32eb5632cb7b Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 25 Sep 2023 16:24:50 +0200 Subject: [PATCH 11/13] Make cargo insta happy --- meilisearch/tests/features/mod.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/meilisearch/tests/features/mod.rs b/meilisearch/tests/features/mod.rs index 348deb5e2..bc3d4e798 100644 --- a/meilisearch/tests/features/mod.rs +++ b/meilisearch/tests/features/mod.rs @@ -16,7 +16,8 @@ async fn experimental_features() { meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { "scoreDetails": false, - "vectorStore": false + "vectorStore": false, + "exportPuffinReports": false } "###); @@ -26,7 +27,8 @@ async fn experimental_features() { meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { "scoreDetails": false, - "vectorStore": true + "vectorStore": true, + "exportPuffinReports": false } "###); @@ -36,7 +38,8 @@ async fn experimental_features() { meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { "scoreDetails": false, - "vectorStore": true + "vectorStore": true, + "exportPuffinReports": false } "###); @@ -47,7 +50,8 @@ async fn experimental_features() { meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { "scoreDetails": false, - "vectorStore": true + "vectorStore": true, + "exportPuffinReports": false } "###); @@ -58,7 +62,8 @@ async fn experimental_features() { meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { "scoreDetails": false, - "vectorStore": true + "vectorStore": true, + "exportPuffinReports": false } "###); } @@ -73,7 +78,7 @@ async fn errors() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`", + "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `exportPuffinReports`", "code": "bad_request", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#bad_request" From 055ca3935bf20959c7f2c326dfc718eaf938fec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 27 Sep 2023 16:41:22 +0200 Subject: [PATCH 12/13] Update index-scheduler/src/batch.rs Co-authored-by: Tamo --- index-scheduler/src/batch.rs | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index e544c8769..48eae0063 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -17,10 +17,9 @@ tasks individally, but should be much faster since we are only performing one indexing operation. */ -use core::fmt; -use std::borrow::Cow; use std::collections::{BTreeSet, HashSet}; use std::ffi::OsStr; +use std::fmt; use std::fs::{self, File}; use std::io::BufWriter; @@ -206,20 +205,20 @@ impl fmt::Display for Batch { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let index_uid = self.index_uid(); let tasks = self.ids(); - let task = match self { - Batch::TaskCancelation { .. } => Cow::Borrowed("TaskCancelation"), - Batch::TaskDeletion(_) => Cow::Borrowed("TaskDeletion"), - Batch::SnapshotCreation(_) => Cow::Borrowed("SnapshotCreation"), - Batch::Dump(_) => Cow::Borrowed("Dump"), - Batch::IndexOperation { op, .. } => Cow::Owned(op.to_string()), - Batch::IndexCreation { .. } => Cow::Borrowed("IndexCreation"), - Batch::IndexUpdate { .. } => Cow::Borrowed("IndexUpdate"), - Batch::IndexDeletion { .. } => Cow::Borrowed("IndexDeletion"), - Batch::IndexSwap { .. } => Cow::Borrowed("IndexSwap"), + match self { + Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?, + Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?, + Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?, + Batch::Dump(_) => f.write_str("Dump")?, + Batch::IndexOperation { op, .. } => write!(f, "{op}")?, + Batch::IndexCreation { .. } => f.write_str("IndexCreation")?, + Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?, + Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?, + Batch::IndexSwap { .. } => f.write_str("IndexSwap")?, }; match index_uid { - Some(name) => f.write_fmt(format_args!("{task} on {name:?} from tasks: {tasks:?}")), - None => f.write_fmt(format_args!("{task} from tasks: {tasks:?}")), + Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")), + None => f.write_fmt(format_args!(" from tasks: {tasks:?}")), } } } From 3655d4bdca2aaa8d08d214e8d01fc78d408c4672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 28 Sep 2023 10:10:12 +0200 Subject: [PATCH 13/13] Move the puffin file export logic into the run function --- index-scheduler/src/lib.rs | 60 +++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index c8a7f0c8b..0b3a5d58a 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -579,17 +579,52 @@ impl IndexScheduler { run.wake_up.wait(); loop { + let puffin_enabled = match run.features() { + Ok(features) => features.check_puffin().is_ok(), + Err(e) => { + log::error!("{e}"); + continue; + } + }; + puffin::set_scopes_on(puffin_enabled); + puffin::GlobalProfiler::lock().new_frame(); + match run.tick() { Ok(TickOutcome::TickAgain(_)) => (), Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(), Err(e) => { - log::error!("{}", e); + log::error!("{e}"); // Wait one second when an irrecoverable error occurs. if !e.is_recoverable() { std::thread::sleep(Duration::from_secs(1)); } } } + + // Let's write the previous frame to disk but only if + // the user wanted to profile with puffin. + if puffin_enabled { + let mut frame_view = run.puffin_frame.lock(); + if !frame_view.is_empty() { + let now = OffsetDateTime::now_utc(); + let mut file = match File::create(format!("{}.puffin", now)) { + Ok(file) => file, + Err(e) => { + log::error!("{e}"); + continue; + } + }; + if let Err(e) = frame_view.save_to_writer(&mut file) { + log::error!("{e}"); + } + if let Err(e) = file.sync_all() { + log::error!("{e}"); + } + // We erase this frame view as it is no more useful. We want to + // measure the new frames now that we exported the previous ones. + *frame_view = FrameView::default(); + } + } } }) .unwrap(); @@ -1069,34 +1104,13 @@ impl IndexScheduler { self.breakpoint(Breakpoint::Start); } - let puffin_enabled = self.features()?.check_puffin().is_ok(); - puffin::set_scopes_on(puffin_enabled); - puffin::GlobalProfiler::lock().new_frame(); - self.cleanup_task_queue()?; let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; let batch = match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? { Some(batch) => batch, - None => { - // Let's write the previous frame to disk but only if - // the user wanted to profile with puffin. - if puffin_enabled { - let mut frame_view = self.puffin_frame.lock(); - if !frame_view.is_empty() { - let now = OffsetDateTime::now_utc(); - let mut file = File::create(format!("{}.puffin", now))?; - frame_view.save_to_writer(&mut file)?; - file.sync_all()?; - // We erase this frame view as it is no more useful. We want to - // measure the new frames now that we exported the previous ones. - *frame_view = FrameView::default(); - } - } - - return Ok(TickOutcome::WaitForSignal); - } + None => return Ok(TickOutcome::WaitForSignal), }; let index_uid = batch.index_uid().map(ToOwned::to_owned); drop(rtxn);