diff --git a/Cargo.lock b/Cargo.lock index 3f9171edc..32ad13772 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -535,6 +535,26 @@ dependencies = [ "serde", ] +[[package]] +name = "bindgen" +version = "0.68.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078" +dependencies = [ + "bitflags 2.4.1", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "peeking_take_while", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.48", +] + [[package]] name = "bit-set" version = "0.5.3" @@ -831,6 +851,15 @@ dependencies = [ "smallvec", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -915,6 +944,17 @@ dependencies = [ "inout", ] +[[package]] +name = "clang-sys" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "4.4.17" @@ -961,6 +1001,18 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" +[[package]] +name = "color-spantrace" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd6be1b2a7e382e2b98b43b2adcca6bb0e465af0bdd38123873ae61eb17a72c2" +dependencies = [ + "once_cell", + "owo-colors", + "tracing-core", + "tracing-error", +] + [[package]] name = "colorchoice" version = "1.0.0" @@ -1286,6 +1338,15 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "debugid" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" +dependencies = [ + "uuid", +] + [[package]] name = "deduplicating_array" version = "0.1.5" @@ -1482,7 +1543,6 @@ dependencies = [ "big_s", "flate2", "http 0.2.11", - "log", "maplit", "meili-snap", "meilisearch-auth", @@ -1496,6 +1556,7 @@ dependencies = [ "tempfile", "thiserror", "time", + "tracing", "uuid", ] @@ -1911,6 +1972,19 @@ dependencies = [ "byteorder", ] +[[package]] +name = "fxprof-processed-profile" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27d12c0aed7f1e24276a241aadc4cb8ea9f83000f34bc062b7cc2d51e3b0fabd" +dependencies = [ + "bitflags 2.4.1", + "debugid", + "fxhash", + "serde", + "serde_json", +] + [[package]] name = "gemm" version = "0.17.0" @@ -2869,11 +2943,9 @@ dependencies = [ "file-store", "flate2", "insta", - "log", "meili-snap", "meilisearch-auth", "meilisearch-types", - "nelson", "page_size 0.5.0", "puffin", "roaring", @@ -2883,6 +2955,7 @@ dependencies = [ "tempfile", "thiserror", "time", + "tracing", "ureq", "uuid", ] @@ -3085,6 +3158,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "levenshtein_automata" version = "0.2.1" @@ -3112,6 +3191,16 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "libloading" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "libm" version = "0.2.7" @@ -3128,6 +3217,17 @@ dependencies = [ "libc", ] +[[package]] +name = "libproc" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "229004ebba9d1d5caf41623f1523b6d52abb47d9f6ab87f7e6fc992e3b854aef" +dependencies = [ + "bindgen", + "errno", + "libc", +] + [[package]] name = "libz-sys" version = "1.1.12" @@ -3586,7 +3686,6 @@ dependencies = [ "deserr", "dump", "either", - "env_logger", "file-store", "flate2", "fst", @@ -3601,7 +3700,6 @@ dependencies = [ "itertools 0.11.0", "jsonwebtoken", "lazy_static", - "log", "manifest-dir-macros", "maplit", "meili-snap", @@ -3644,6 +3742,10 @@ dependencies = [ "tokio", "tokio-stream", "toml", + "tracing", + "tracing-actix-web", + "tracing-subscriber", + "tracing-trace", "url", "urlencoding", "uuid", @@ -3776,7 +3878,6 @@ dependencies = [ "json-depth-checker", "levenshtein_automata", "liquid", - "log", "logging_timer", "maplit", "md5", @@ -3805,6 +3906,7 @@ dependencies = [ "time", "tokenizers", "tokio", + "tracing", "uuid", ] @@ -3882,9 +3984,10 @@ dependencies = [ ] [[package]] -name = "nelson" -version = "0.1.0" -source = "git+https://github.com/meilisearch/nelson.git?rev=675f13885548fb415ead8fbb447e9e6d9314000a#675f13885548fb415ead8fbb447e9e6d9314000a" +name = "mutually_exclusive_features" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e" [[package]] name = "nom" @@ -3916,6 +4019,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num-bigint" version = "0.4.3" @@ -4037,6 +4150,18 @@ dependencies = [ "num-traits", ] +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "owo-colors" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" + [[package]] name = "page_size" version = "0.5.0" @@ -4124,6 +4249,12 @@ dependencies = [ "sha2", ] +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + [[package]] name = "pem" version = "1.1.1" @@ -4243,6 +4374,26 @@ dependencies = [ "siphasher 0.3.11", ] +[[package]] +name = "pin-project" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0302c4a0442c456bd56f841aee5c3bfd17967563f6fadc9ceb9f9c23cf3807e0" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "pin-project-lite" version = "0.2.13" @@ -4994,6 +5145,21 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook-registry" version = "1.4.1" @@ -5326,6 +5492,16 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "thread_local" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "tiktoken-rs" version = "0.5.8" @@ -5554,17 +5730,29 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.37" +version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "cfg-if", "log", "pin-project-lite", "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-actix-web" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fe0d5feac3f4ca21ba33496bcb1ccab58cca6412b1405ae80f0581541e0ca78" +dependencies = [ + "actix-web", + "mutually_exclusive_features", + "pin-project", + "tracing", + "uuid", +] + [[package]] name = "tracing-attributes" version = "0.1.27" @@ -5578,11 +5766,63 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-error" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e" +dependencies = [ + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "tracing-trace" +version = "0.1.0" +dependencies = [ + "byte-unit", + "color-spantrace", + "fxprof-processed-profile", + "libproc", + "serde", + "serde_json", + "tokio", + "tracing", + "tracing-error", + "tracing-subscriber", ] [[package]] @@ -5758,6 +5998,12 @@ dependencies = [ "serde", ] +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index a0c6c3ac9..7f6a8088e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,12 +16,16 @@ members = [ "json-depth-checker", "benchmarks", "fuzzers", + "tracing-trace", "xtask", ] [workspace.package] version = "1.6.1" -authors = ["Quentin de Quelen ", "Clément Renault "] +authors = [ + "Quentin de Quelen ", + "Clément Renault ", +] description = "Meilisearch HTTP server" homepage = "https://meilisearch.com" readme = "README.md" diff --git a/dump/Cargo.toml b/dump/Cargo.toml index 941cec72d..92830c782 100644 --- a/dump/Cargo.toml +++ b/dump/Cargo.toml @@ -14,7 +14,6 @@ license.workspace = true anyhow = "1.0.79" flate2 = "1.0.28" http = "0.2.11" -log = "0.4.20" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } once_cell = "1.19.0" @@ -26,6 +25,7 @@ tar = "0.4.40" tempfile = "3.9.0" thiserror = "1.0.56" time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] } +tracing = "0.1.40" uuid = { version = "1.6.1", features = ["serde", "v4"] } [dev-dependencies] diff --git a/dump/src/reader/compat/v1_to_v2.rs b/dump/src/reader/compat/v1_to_v2.rs index 789e8e0b1..0d050497b 100644 --- a/dump/src/reader/compat/v1_to_v2.rs +++ b/dump/src/reader/compat/v1_to_v2.rs @@ -120,7 +120,7 @@ impl From for v2::Settings { criterion.as_ref().map(ToString::to_string) } Err(()) => { - log::warn!( + tracing::warn!( "Could not import the following ranking rule: `{}`.", ranking_rule ); @@ -152,11 +152,11 @@ impl From for Option { use v2::updates::UpdateStatus as UpdateStatusV2; Some(match source { UpdateStatusV1::Enqueued { content } => { - log::warn!( + tracing::warn!( "Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)", content.update_id ); - log::warn!("Task will be skipped in the queue of imported tasks."); + tracing::warn!("Task will be skipped in the queue of imported tasks."); return None; } @@ -229,7 +229,7 @@ impl From for Option { Some(match source { v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments, v1::update::UpdateType::Customs => { - log::warn!("Ignoring task with type 'Customs' that is no longer supported"); + tracing::warn!("Ignoring task with type 'Customs' that is no longer supported"); return None; } v1::update::UpdateType::DocumentsAddition { .. } => { @@ -296,7 +296,7 @@ impl From for Option { v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity), v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute), v1::settings::RankingRule::WordsPosition => { - log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes"); + tracing::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes"); None } v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness), diff --git a/dump/src/reader/compat/v2_to_v3.rs b/dump/src/reader/compat/v2_to_v3.rs index 1d5f4e153..1d4238290 100644 --- a/dump/src/reader/compat/v2_to_v3.rs +++ b/dump/src/reader/compat/v2_to_v3.rs @@ -146,8 +146,8 @@ impl From for v3::updates::UpdateStatus { started_processing_at: processing.started_processing_at, }), Err(e) => { - log::warn!("Error with task {}: {}", processing.from.update_id, e); - log::warn!("Task will be marked as `Failed`."); + tracing::warn!("Error with task {}: {}", processing.from.update_id, e); + tracing::warn!("Task will be marked as `Failed`."); v3::updates::UpdateStatus::Failed(v3::updates::Failed { from: v3::updates::Processing { from: v3::updates::Enqueued { @@ -172,8 +172,8 @@ impl From for v3::updates::UpdateStatus { enqueued_at: enqueued.enqueued_at, }), Err(e) => { - log::warn!("Error with task {}: {}", enqueued.update_id, e); - log::warn!("Task will be marked as `Failed`."); + tracing::warn!("Error with task {}: {}", enqueued.update_id, e); + tracing::warn!("Task will be marked as `Failed`."); v3::updates::UpdateStatus::Failed(v3::updates::Failed { from: v3::updates::Processing { from: v3::updates::Enqueued { @@ -353,7 +353,7 @@ impl From for v3::Code { "malformed_payload" => v3::Code::MalformedPayload, "missing_payload" => v3::Code::MissingPayload, other => { - log::warn!("Unknown error code {}", other); + tracing::warn!("Unknown error code {}", other); v3::Code::UnretrievableErrorCode } } diff --git a/dump/src/reader/compat/v3_to_v4.rs b/dump/src/reader/compat/v3_to_v4.rs index b4153eb31..244948200 100644 --- a/dump/src/reader/compat/v3_to_v4.rs +++ b/dump/src/reader/compat/v3_to_v4.rs @@ -76,20 +76,20 @@ impl CompatV3ToV4 { let index_uid = match index_uid { Some(uid) => uid, None => { - log::warn!( + tracing::warn!( "Error while importing the update {}.", task.update.id() ); - log::warn!( + tracing::warn!( "The index associated to the uuid `{}` could not be retrieved.", task.uuid.to_string() ); if task.update.is_finished() { // we're fucking with his history but not his data, that's ok-ish. - log::warn!("The index-uuid will be set as `unknown`."); + tracing::warn!("The index-uuid will be set as `unknown`."); String::from("unknown") } else { - log::warn!("The task will be ignored."); + tracing::warn!("The task will be ignored."); return None; } } diff --git a/dump/src/reader/compat/v4_to_v5.rs b/dump/src/reader/compat/v4_to_v5.rs index 850e2cccd..aa8441779 100644 --- a/dump/src/reader/compat/v4_to_v5.rs +++ b/dump/src/reader/compat/v4_to_v5.rs @@ -305,7 +305,7 @@ impl From for v5::ResponseError { "invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt, "invalid_api_key_description" => v5::Code::InvalidApiKeyDescription, other => { - log::warn!("Unknown error code {}", other); + tracing::warn!("Unknown error code {}", other); v5::Code::UnretrievableErrorCode } }; diff --git a/dump/src/reader/compat/v5_to_v6.rs b/dump/src/reader/compat/v5_to_v6.rs index 9351ae70d..e00d3a599 100644 --- a/dump/src/reader/compat/v5_to_v6.rs +++ b/dump/src/reader/compat/v5_to_v6.rs @@ -304,7 +304,7 @@ impl From for v6::ResponseError { "immutable_field" => v6::Code::BadRequest, "api_key_already_exists" => v6::Code::ApiKeyAlreadyExists, other => { - log::warn!("Unknown error code {}", other); + tracing::warn!("Unknown error code {}", other); v6::Code::UnretrievableErrorCode } }; @@ -329,7 +329,7 @@ impl From> for v6::Settings { new_ranking_rules.push(new_rule); } Err(_) => { - log::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.") + tracing::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.") } } } diff --git a/dump/src/reader/v6/mod.rs b/dump/src/reader/v6/mod.rs index 4e980e03e..50b9751a2 100644 --- a/dump/src/reader/v6/mod.rs +++ b/dump/src/reader/v6/mod.rs @@ -2,10 +2,10 @@ use std::fs::{self, File}; use std::io::{BufRead, BufReader, ErrorKind}; use std::path::Path; -use log::debug; pub use meilisearch_types::milli; use tempfile::TempDir; use time::OffsetDateTime; +use tracing::debug; use uuid::Uuid; use super::Document; diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 4300bc12c..890312854 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -19,7 +19,6 @@ dump = { path = "../dump" } enum-iterator = "1.5.0" file-store = { path = "../file-store" } flate2 = "1.0.28" -log = "0.4.20" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } page_size = "0.5.0" @@ -30,7 +29,13 @@ serde_json = { version = "1.0.111", features = ["preserve_order"] } synchronoise = "1.0.1" tempfile = "3.9.0" thiserror = "1.0.56" -time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] } +time = { version = "0.3.31", features = [ + "serde-well-known", + "formatting", + "parsing", + "macros", +] } +tracing = "0.1.40" ureq = "2.9.1" uuid = { version = "1.6.1", features = ["serde", "v4"] } @@ -39,4 +44,3 @@ big_s = "1.0.2" crossbeam = "0.8.4" insta = { version = "1.34.0", features = ["json", "redactions"] } meili-snap = { path = "../meili-snap" } -nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"} diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 01b0ddc1e..8e2eb26a0 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -24,7 +24,6 @@ use std::fs::{self, File}; use std::io::BufWriter; use dump::IndexMetadata; -use log::{debug, error, info, trace}; use meilisearch_types::error::Code; use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; @@ -514,6 +513,7 @@ impl IndexScheduler { /// 3. We get the *next* snapshot to process. /// 4. We get the *next* dump to process. /// 5. We get the *next* tasks to process for a specific index. + #[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")] pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result> { #[cfg(test)] self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?; @@ -619,6 +619,7 @@ impl IndexScheduler { /// The list of tasks that were processed. The metadata of each task in the returned /// list is updated accordingly, with the exception of the its date fields /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). + #[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))] pub(crate) fn process_batch(&self, batch: Batch) -> Result> { #[cfg(test)] { @@ -668,9 +669,10 @@ impl IndexScheduler { Ok(()) => { for content_uuid in canceled_tasks_content_uuids { if let Err(error) = self.delete_update_file(content_uuid) { - error!( - "We failed deleting the content file indentified as {}: {}", - content_uuid, error + tracing::error!( + file_content_uuid = %content_uuid, + %error, + "Failed deleting content file" ) } } @@ -969,7 +971,10 @@ impl IndexScheduler { match res { Ok(_) => (), - Err(e) => error!("Could not write the stats of the index {}", e), + Err(e) => tracing::error!( + error = &e as &dyn std::error::Error, + "Could not write the stats of the index" + ), } Ok(tasks) @@ -997,7 +1002,7 @@ impl IndexScheduler { builder.set_primary_key(primary_key); let must_stop_processing = self.must_stop_processing.clone(); builder.execute( - |indexing_step| debug!("update: {:?}", indexing_step), + |indexing_step| tracing::debug!(update = ?indexing_step), || must_stop_processing.get(), )?; index_wtxn.commit()?; @@ -1024,7 +1029,10 @@ impl IndexScheduler { match res { Ok(_) => (), - Err(e) => error!("Could not write the stats of the index {}", e), + Err(e) => tracing::error!( + error = &e as &dyn std::error::Error, + "Could not write the stats of the index" + ), } Ok(vec![task]) @@ -1143,6 +1151,11 @@ impl IndexScheduler { /// /// ## Return /// The list of processed tasks. + #[tracing::instrument( + level = "trace", + skip(self, index_wtxn, index), + target = "indexing::scheduler" + )] fn apply_index_operation<'i>( &self, index_wtxn: &mut RwTxn<'i>, @@ -1203,7 +1216,7 @@ impl IndexScheduler { milli::update::Settings::new(index_wtxn, index, indexer_config); builder.set_primary_key(primary_key); builder.execute( - |indexing_step| debug!("update: {:?}", indexing_step), + |indexing_step| tracing::debug!(update = ?indexing_step), || must_stop_processing.clone().get(), )?; primary_key_has_been_set = true; @@ -1222,7 +1235,7 @@ impl IndexScheduler { index, indexer_config, config, - |indexing_step| trace!("update: {:?}", indexing_step), + |indexing_step| tracing::trace!(?indexing_step, "Update"), || must_stop_processing.get(), )?; @@ -1294,7 +1307,7 @@ impl IndexScheduler { if !tasks.iter().all(|res| res.error.is_some()) { let addition = builder.execute()?; - info!("document addition done: {:?}", addition); + tracing::info!(indexing_result = ?addition, "document indexing done"); } else if primary_key_has_been_set { // Everything failed but we've set a primary key. // We need to remove it. @@ -1302,7 +1315,7 @@ impl IndexScheduler { milli::update::Settings::new(index_wtxn, index, indexer_config); builder.reset_primary_key(); builder.execute( - |indexing_step| trace!("update: {:?}", indexing_step), + |indexing_step| tracing::trace!(update = ?indexing_step), || must_stop_processing.clone().get(), )?; } @@ -1372,7 +1385,7 @@ impl IndexScheduler { let must_stop_processing = self.must_stop_processing.clone(); builder.execute( - |indexing_step| debug!("update: {:?}", indexing_step), + |indexing_step| tracing::debug!(update = ?indexing_step), || must_stop_processing.get(), )?; @@ -1584,7 +1597,7 @@ fn delete_document_by_filter<'a>( index, indexer_config, config, - |indexing_step| debug!("update: {:?}", indexing_step), + |indexing_step| tracing::debug!(update = ?indexing_step), || must_stop_processing.get(), )?; diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index c18ab98db..4fd5bd0e7 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -43,6 +43,19 @@ impl RoFeatures { } } + pub fn check_logs_route(&self) -> Result<()> { + if self.runtime.logs_route { + Ok(()) + } else { + Err(FeatureNotEnabledError { + disabled_action: "getting logs through the `/logs/stream` route", + feature: "logs route", + issue_link: "https://github.com/orgs/meilisearch/discussions/721", + } + .into()) + } + } + pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> { if self.runtime.vector_store { Ok(()) @@ -81,6 +94,7 @@ impl FeatureData { runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default(); let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures { metrics: instance_features.metrics || persisted_features.metrics, + logs_route: instance_features.logs_route || persisted_features.logs_route, ..persisted_features })); diff --git a/index-scheduler/src/index_mapper/mod.rs b/index-scheduler/src/index_mapper/mod.rs index 58ec2bf11..14908120c 100644 --- a/index-scheduler/src/index_mapper/mod.rs +++ b/index-scheduler/src/index_mapper/mod.rs @@ -3,13 +3,13 @@ use std::sync::{Arc, RwLock}; use std::time::Duration; use std::{fs, thread}; -use log::error; use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::{FieldDistribution, Index}; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; +use tracing::error; use uuid::Uuid; use self::index_map::IndexMap; diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 946a2a33e..7514a2a68 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -535,17 +535,17 @@ impl IndexScheduler { let budget = if Self::is_good_heed(tasks_path, DEFAULT_BUDGET) { DEFAULT_BUDGET } else { - log::debug!("determining budget with dichotomic search"); + tracing::debug!("determining budget with dichotomic search"); utils::dichotomic_search(DEFAULT_BUDGET / 2, |map_size| { Self::is_good_heed(tasks_path, map_size) }) }; - log::debug!("memmap budget: {budget}B"); + tracing::debug!("memmap budget: {budget}B"); let mut budget = budget / 2; if task_db_size > (budget / 2) { task_db_size = clamp_to_page_size(budget * 2 / 5); - log::debug!( + tracing::debug!( "Decreasing max size of task DB to {task_db_size}B due to constrained memory space" ); } @@ -555,13 +555,13 @@ impl IndexScheduler { let budget = budget; let task_db_size = task_db_size; - log::debug!("index budget: {budget}B"); + tracing::debug!("index budget: {budget}B"); let mut index_count = budget / base_map_size; if index_count < 2 { // take a bit less than half than the budget to make sure we can always afford to open an index let map_size = (budget * 2) / 5; // single index of max budget - log::debug!("1 index of {map_size}B can be opened simultaneously."); + tracing::debug!("1 index of {map_size}B can be opened simultaneously."); return IndexBudget { map_size, index_count: 1, task_db_size }; } // give us some space for an additional index when the cache is already full @@ -570,7 +570,7 @@ impl IndexScheduler { if index_count > max_index_count { index_count = max_index_count; } - log::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously."); + tracing::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously."); IndexBudget { map_size: base_map_size, index_count, task_db_size } } @@ -617,7 +617,7 @@ impl IndexScheduler { Ok(TickOutcome::TickAgain(_)) => (), Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(), Err(e) => { - log::error!("{e}"); + tracing::error!("{e}"); // Wait one second when an irrecoverable error occurs. if !e.is_recoverable() { std::thread::sleep(Duration::from_secs(1)); @@ -634,15 +634,15 @@ impl IndexScheduler { let mut file = match File::create(format!("{}.puffin", now)) { Ok(file) => file, Err(e) => { - log::error!("{e}"); + tracing::error!("{e}"); continue; } }; if let Err(e) = frame_view.save_to_writer(&mut file) { - log::error!("{e}"); + tracing::error!("{e}"); } if let Err(e) = file.sync_all() { - log::error!("{e}"); + tracing::error!("{e}"); } // We erase this frame view as it is no more useful. We want to // measure the new frames now that we exported the previous ones. @@ -1190,10 +1190,10 @@ impl IndexScheduler { self.update_task(&mut wtxn, &task) .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; if let Err(e) = self.delete_persisted_task_data(&task) { - log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); + tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); } } - log::info!("A batch of tasks was successfully completed."); + tracing::info!("A batch of tasks was successfully completed."); } // If we have an abortion error we must stop the tick here and re-schedule tasks. Err(Error::Milli(milli::Error::InternalError( @@ -1247,7 +1247,7 @@ impl IndexScheduler { self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?; if let Err(e) = self.delete_persisted_task_data(&task) { - log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); + tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); } self.update_task(&mut wtxn, &task) .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; @@ -1341,7 +1341,7 @@ impl IndexScheduler { }; if let Err(e) = request.send(reader) { - log::error!("While sending data to the webhook: {e}"); + tracing::error!("While sending data to the webhook: {e}"); } } @@ -1367,12 +1367,12 @@ impl IndexScheduler { // /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete // the deletion tasks we enqueued ourselves. if to_delete.len() < 2 { - log::warn!("The task queue is almost full, but no task can be deleted yet."); + tracing::warn!("The task queue is almost full, but no task can be deleted yet."); // the only thing we can do is hope that the user tasks are going to finish return Ok(()); } - log::info!( + tracing::info!( "The task queue is almost full. Deleting the oldest {} finished tasks.", to_delete.len() ); diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 1b54e77c0..965d2e672 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -310,6 +310,8 @@ TooManyVectors , InvalidRequest , BAD_REQUEST ; UnretrievableDocument , Internal , BAD_REQUEST ; UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ; UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; + +// Experimental features VectorEmbeddingError , InvalidRequest , BAD_REQUEST } diff --git a/meilisearch-types/src/features.rs b/meilisearch-types/src/features.rs index d737c618e..04a5d9d6f 100644 --- a/meilisearch-types/src/features.rs +++ b/meilisearch-types/src/features.rs @@ -5,10 +5,12 @@ use serde::{Deserialize, Serialize}; pub struct RuntimeTogglableFeatures { pub vector_store: bool, pub metrics: bool, + pub logs_route: bool, pub export_puffin_reports: bool, } #[derive(Default, Debug, Clone, Copy)] pub struct InstanceTogglableFeatures { pub metrics: bool, + pub logs_route: bool, } diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 1d7f53229..7fbabba87 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -42,7 +42,6 @@ crossbeam-channel = "0.5.11" deserr = { version = "0.6.1", features = ["actix-web"] } dump = { path = "../dump" } either = "1.9.0" -env_logger = "0.10.1" file-store = { path = "../file-store" } flate2 = "1.0.28" fst = "0.4.7" @@ -55,7 +54,6 @@ is-terminal = "0.4.10" itertools = "0.11.0" jsonwebtoken = "8.3.0" lazy_static = "1.4.0" -log = "0.4.20" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } mimalloc = { version = "0.1.39", default-features = false } @@ -105,6 +103,10 @@ yaup = "0.2.1" serde_urlencoded = "0.7.1" termcolor = "1.4.1" url = { version = "2.5.0", features = ["serde"] } +tracing = "0.1.40" +tracing-subscriber = "0.3.18" +tracing-trace = { version = "0.1.0", path = "../tracing-trace" } +tracing-actix-web = "0.7.9" [dev-dependencies] actix-rt = "2.9.0" diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 6969c9b3d..a38ddaab2 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -250,6 +250,7 @@ impl super::Analytics for SegmentAnalytics { struct Infos { env: String, experimental_enable_metrics: bool, + experimental_enable_logs_route: bool, experimental_reduce_indexing_memory_usage: bool, experimental_max_number_of_batched_tasks: usize, db_path: bool, @@ -287,6 +288,7 @@ impl From for Infos { let Opt { db_path, experimental_enable_metrics, + experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, experimental_max_number_of_batched_tasks, http_addr, @@ -333,6 +335,7 @@ impl From for Infos { Self { env, experimental_enable_metrics, + experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, db_path: db_path != PathBuf::from("./data.ms"), import_dump: import_dump.is_some(), diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 3bd8f3edd..a8351fd1f 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -12,6 +12,8 @@ pub enum MeilisearchHttpError { #[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}", .0.iter().map(|s| format!("`{}`", s)).collect::>().join(", "))] MissingContentType(Vec), + #[error("The `/logs/stream` route is currently in use by someone else.")] + AlreadyUsedLogRoute, #[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")] CsvDelimiterWithWrongContentType(String), #[error( @@ -59,6 +61,7 @@ impl ErrorCode for MeilisearchHttpError { fn error_code(&self) -> Code { match self { MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType, + MeilisearchHttpError::AlreadyUsedLogRoute => Code::BadRequest, MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType, MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload, MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType, diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index f1111962c..c43a32cdc 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -29,7 +29,6 @@ use error::PayloadError; use extractors::payload::PayloadConfig; use http::header::CONTENT_TYPE; use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; -use log::error; use meilisearch_auth::AuthController; use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod}; @@ -39,6 +38,8 @@ use meilisearch_types::versioning::{check_version_file, create_version_file}; use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; pub use option::Opt; use option::ScheduleSnapshot; +use tracing::{error, info_span}; +use tracing_subscriber::filter::Targets; use crate::error::MeilisearchHttpError; @@ -86,10 +87,21 @@ fn is_empty_db(db_path: impl AsRef) -> bool { } } +/// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`. +pub type LogRouteHandle = + tracing_subscriber::reload::Handle; + +pub type LogRouteType = tracing_subscriber::filter::Filtered< + Option + Send + Sync>>, + Targets, + tracing_subscriber::Registry, +>; + pub fn create_app( index_scheduler: Data, auth_controller: Data, opt: Opt, + logs: LogRouteHandle, analytics: Arc, enable_dashboard: bool, ) -> actix_web::App< @@ -108,6 +120,7 @@ pub fn create_app( index_scheduler.clone(), auth_controller.clone(), &opt, + logs, analytics.clone(), ) }) @@ -123,11 +136,49 @@ pub fn create_app( .allow_any_method() .max_age(86_400), // 24h ) - .wrap(actix_web::middleware::Logger::default()) + .wrap(tracing_actix_web::TracingLogger::::new()) .wrap(actix_web::middleware::Compress::default()) .wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim)) } +struct AwebTracingLogger; + +impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger { + fn on_request_start(request: &actix_web::dev::ServiceRequest) -> tracing::Span { + use tracing::field::Empty; + + let conn_info = request.connection_info(); + let headers = request.headers(); + let user_agent = headers + .get(http::header::USER_AGENT) + .map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned()) + .unwrap_or_default(); + info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty) + } + + fn on_request_end( + span: tracing::Span, + outcome: &Result, actix_web::Error>, + ) { + match &outcome { + Ok(response) => { + let code: i32 = response.response().status().as_u16().into(); + span.record("status_code", code); + + if let Some(error) = response.response().error() { + // use the status code already constructed for the outgoing HTTP response + span.record("error", &tracing::field::display(error.as_response_error())); + } + } + Err(error) => { + let code: i32 = error.error_response().status().as_u16().into(); + span.record("status_code", code); + span.record("error", &tracing::field::display(error.as_response_error())); + } + }; + } +} + enum OnFailure { RemoveDb, KeepDb, @@ -280,15 +331,15 @@ fn import_dump( let mut dump_reader = dump::DumpReader::open(reader)?; if let Some(date) = dump_reader.date() { - log::info!( - "Importing a dump of meilisearch `{:?}` from the {}", - dump_reader.version(), // TODO: get the meilisearch version instead of the dump version - date + tracing::info!( + version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version + %date, + "Importing a dump of meilisearch" ); } else { - log::info!( - "Importing a dump of meilisearch `{:?}`", - dump_reader.version(), // TODO: get the meilisearch version instead of the dump version + tracing::info!( + version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version + "Importing a dump of meilisearch", ); } @@ -322,7 +373,7 @@ fn import_dump( for index_reader in dump_reader.indexes()? { let mut index_reader = index_reader?; let metadata = index_reader.metadata(); - log::info!("Importing index `{}`.", metadata.uid); + tracing::info!("Importing index `{}`.", metadata.uid); let date = Some((metadata.created_at, metadata.updated_at)); let index = index_scheduler.create_raw_index(&metadata.uid, date)?; @@ -336,14 +387,15 @@ fn import_dump( } // 4.2 Import the settings. - log::info!("Importing the settings."); + tracing::info!("Importing the settings."); let settings = index_reader.settings()?; apply_settings_to_builder(&settings, &mut builder); - builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?; + builder + .execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?; // 4.3 Import the documents. // 4.3.1 We need to recreate the grenad+obkv format accepted by the index. - log::info!("Importing the documents."); + tracing::info!("Importing the documents."); let file = tempfile::tempfile()?; let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file)); for document in index_reader.documents()? { @@ -365,15 +417,16 @@ fn import_dump( update_method: IndexDocumentsMethod::ReplaceDocuments, ..Default::default() }, - |indexing_step| log::trace!("update: {:?}", indexing_step), + |indexing_step| tracing::trace!("update: {:?}", indexing_step), || false, )?; let (builder, user_result) = builder.add_documents(reader)?; - log::info!("{} documents found.", user_result?); + let user_result = user_result?; + tracing::info!(documents_found = user_result, "{} documents found.", user_result); builder.execute()?; wtxn.commit()?; - log::info!("All documents successfully imported."); + tracing::info!("All documents successfully imported."); } let mut index_scheduler_dump = index_scheduler.register_dumped_task()?; @@ -391,6 +444,7 @@ pub fn configure_data( index_scheduler: Data, auth: Data, opt: &Opt, + logs: LogRouteHandle, analytics: Arc, ) { let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; @@ -398,6 +452,7 @@ pub fn configure_data( .app_data(index_scheduler) .app_data(auth) .app_data(web::Data::from(analytics)) + .app_data(web::Data::new(logs)) .app_data( web::JsonConfig::default() .limit(http_payload_size_limit) diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index ddd37bbb6..ed18fb97e 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -1,6 +1,7 @@ use std::env; use std::io::{stderr, Write}; use std::path::PathBuf; +use std::str::FromStr; use std::sync::Arc; use actix_web::http::KeepAlive; @@ -9,37 +10,60 @@ use actix_web::HttpServer; use index_scheduler::IndexScheduler; use is_terminal::IsTerminal; use meilisearch::analytics::Analytics; -use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt}; +use meilisearch::{ + analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, LogRouteType, Opt, +}; use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE}; +use mimalloc::MiMalloc; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::layer::SubscriberExt as _; +use tracing_subscriber::Layer; #[global_allocator] -static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; +static ALLOC: MiMalloc = MiMalloc; + +fn default_layer() -> LogRouteType { + None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF)) +} /// does all the setup before meilisearch is launched -fn setup(opt: &Opt) -> anyhow::Result<()> { - let mut log_builder = env_logger::Builder::new(); - let log_filters = format!( - "{},h2=warn,hyper=warn,tokio_util=warn,tracing=warn,rustls=warn,mio=warn,reqwest=warn", - opt.log_level +fn setup(opt: &Opt) -> anyhow::Result { + let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(default_layer()); + let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer; + + let subscriber = tracing_subscriber::registry().with(route_layer).with( + tracing_subscriber::fmt::layer() + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE) + .with_filter( + tracing_subscriber::filter::LevelFilter::from_str(&opt.log_level.to_string()) + .unwrap(), + ), ); - log_builder.parse_filters(&log_filters); - log_builder.init(); + // set the subscriber as the default for the application + tracing::subscriber::set_global_default(subscriber).unwrap(); - Ok(()) + Ok(route_layer_handle) +} + +fn on_panic(info: &std::panic::PanicInfo) { + let info = info.to_string().replace('\n', " "); + tracing::error!(%info); } #[actix_web::main] async fn main() -> anyhow::Result<()> { let (opt, config_read_from) = Opt::try_build()?; + std::panic::set_hook(Box::new(on_panic)); + anyhow::ensure!( !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage), "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows" ); - setup(&opt)?; + let log_handle = setup(&opt)?; match (opt.env.as_ref(), &opt.master_key) { ("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => { @@ -77,7 +101,7 @@ async fn main() -> anyhow::Result<()> { print_launch_resume(&opt, analytics.clone(), config_read_from); - run_http(index_scheduler, auth_controller, opt, analytics).await?; + run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?; Ok(()) } @@ -86,6 +110,7 @@ async fn run_http( index_scheduler: Arc, auth_controller: Arc, opt: Opt, + logs: LogRouteHandle, analytics: Arc, ) -> anyhow::Result<()> { let enable_dashboard = &opt.env == "development"; @@ -98,6 +123,7 @@ async fn run_http( index_scheduler.clone(), auth_controller.clone(), opt.clone(), + logs.clone(), analytics.clone(), enable_dashboard, ) diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 3cff14aa0..9586a3f6f 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -51,6 +51,7 @@ const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP"; const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; +const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE"; @@ -309,6 +310,13 @@ pub struct Opt { #[serde(default)] pub experimental_enable_metrics: bool, + /// Experimental logs route feature. For more information, see: + /// + /// Enables the log route on the `POST /logs/stream` endpoint and the `DELETE /logs/stream` to stop receiving logs. + #[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE)] + #[serde(default)] + pub experimental_enable_logs_route: bool, + /// Experimental RAM reduction during indexing, do not use in production, see: #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] #[serde(default)] @@ -414,6 +422,7 @@ impl Opt { #[cfg(feature = "analytics")] no_analytics, experimental_enable_metrics, + experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, } = self; export_to_env_if_not_present(MEILI_DB_PATH, db_path); @@ -470,6 +479,10 @@ impl Opt { MEILI_EXPERIMENTAL_ENABLE_METRICS, experimental_enable_metrics.to_string(), ); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE, + experimental_enable_logs_route.to_string(), + ); export_to_env_if_not_present( MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE, experimental_reduce_indexing_memory_usage.to_string(), @@ -524,7 +537,10 @@ impl Opt { } pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures { - InstanceTogglableFeatures { metrics: self.experimental_enable_metrics } + InstanceTogglableFeatures { + metrics: self.experimental_enable_metrics, + logs_route: self.experimental_enable_logs_route, + } } } diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs index 0aabd2aa6..071ae60b8 100644 --- a/meilisearch/src/routes/dump.rs +++ b/meilisearch/src/routes/dump.rs @@ -1,11 +1,11 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_auth::AuthController; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; use serde_json::json; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; @@ -32,6 +32,6 @@ pub async fn create_dump( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Create dump"); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index 0a7e73ac6..227b485c5 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -3,11 +3,11 @@ use actix_web::{HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use deserr::Deserr; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; use serde_json::json; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::ActionPolicy; @@ -33,8 +33,9 @@ async fn get_features( let features = index_scheduler.features(); analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req)); - debug!("returns: {:?}", features.runtime_features()); - HttpResponse::Ok().json(features.runtime_features()) + let features = features.runtime_features(); + debug!(returns = ?features, "Get features"); + HttpResponse::Ok().json(features) } #[derive(Debug, Deserr)] @@ -45,6 +46,8 @@ pub struct RuntimeTogglableFeatures { #[deserr(default)] pub metrics: Option, #[deserr(default)] + pub logs_route: Option, + #[deserr(default)] pub export_puffin_reports: Option, } @@ -58,11 +61,13 @@ async fn patch_features( analytics: Data, ) -> Result { let features = index_scheduler.features(); + debug!(parameters = ?new_features, "Patch features"); let old_features = features.runtime_features(); let new_features = meilisearch_types::features::RuntimeTogglableFeatures { vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store), metrics: new_features.0.metrics.unwrap_or(old_features.metrics), + logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route), export_puffin_reports: new_features .0 .export_puffin_reports @@ -75,6 +80,7 @@ async fn patch_features( let meilisearch_types::features::RuntimeTogglableFeatures { vector_store, metrics, + logs_route, export_puffin_reports, } = new_features; @@ -83,10 +89,12 @@ async fn patch_features( json!({ "vector_store": vector_store, "metrics": metrics, + "logs_route": logs_route, "export_puffin_reports": export_puffin_reports, }), Some(&req), ); index_scheduler.put_runtime_features(new_features)?; + debug!(returns = ?new_features, "Patch features"); Ok(HttpResponse::Ok().json(new_features)) } diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 6d59f60dd..1f41fa10c 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -8,7 +8,6 @@ use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::Deserr; use futures::StreamExt; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType}; @@ -28,6 +27,7 @@ use serde_json::Value; use tempfile::tempfile; use tokio::fs::File; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; +use tracing::debug; use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind}; use crate::error::MeilisearchHttpError; @@ -101,6 +101,7 @@ pub async fn get_document( analytics: web::Data, ) -> Result { let DocumentParam { index_uid, document_id } = document_param.into_inner(); + debug!(parameters = ?params, "Get document"); let index_uid = IndexUid::try_from(index_uid)?; analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req); @@ -110,7 +111,7 @@ pub async fn get_document( let index = index_scheduler.index(&index_uid)?; let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?; - debug!("returns: {:?}", document); + debug!(returns = ?document, "Get document"); Ok(HttpResponse::Ok().json(document)) } @@ -131,7 +132,7 @@ pub async fn delete_document( }; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Delete document"); Ok(HttpResponse::Accepted().json(task)) } @@ -168,9 +169,8 @@ pub async fn documents_by_query_post( req: HttpRequest, analytics: web::Data, ) -> Result { - debug!("called with body: {:?}", body); - let body = body.into_inner(); + debug!(parameters = ?body, "Get documents POST"); analytics.post_fetch_documents( &DocumentFetchKind::Normal { @@ -191,7 +191,7 @@ pub async fn get_documents( req: HttpRequest, analytics: web::Data, ) -> Result { - debug!("called with params: {:?}", params); + debug!(parameters = ?params, "Get documents GET"); let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner(); @@ -235,7 +235,7 @@ fn documents_by_query( let ret = PaginationView::new(offset, limit, total as usize, documents); - debug!("returns: {:?}", ret); + debug!(returns = ?ret, "Get documents"); Ok(HttpResponse::Ok().json(ret)) } @@ -271,7 +271,7 @@ pub async fn replace_documents( ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; - debug!("called with params: {:?}", params); + debug!(parameters = ?params, "Replace documents"); let params = params.into_inner(); analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); @@ -288,6 +288,7 @@ pub async fn replace_documents( allow_index_creation, ) .await?; + debug!(returns = ?task, "Replace documents"); Ok(HttpResponse::Accepted().json(task)) } @@ -302,8 +303,8 @@ pub async fn update_documents( ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; - debug!("called with params: {:?}", params); let params = params.into_inner(); + debug!(parameters = ?params, "Update documents"); analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); @@ -319,6 +320,7 @@ pub async fn update_documents( allow_index_creation, ) .await?; + debug!(returns = ?task, "Update documents"); Ok(HttpResponse::Accepted().json(task)) } @@ -427,7 +429,10 @@ async fn document_addition( Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e))) if e.kind() == ErrorKind::NotFound => {} Err(e) => { - log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}"); + tracing::warn!( + index_uuid = %uuid, + "Unknown error happened while deleting a malformed update file: {e}" + ); } } // We still want to return the original error to the end user. @@ -453,7 +458,6 @@ async fn document_addition( } }; - debug!("returns: {:?}", task); Ok(task.into()) } @@ -464,7 +468,7 @@ pub async fn delete_documents_batch( req: HttpRequest, analytics: web::Data, ) -> Result { - debug!("called with params: {:?}", body); + debug!(parameters = ?body, "Delete documents by batch"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); @@ -479,7 +483,7 @@ pub async fn delete_documents_batch( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Delete documents by batch"); Ok(HttpResponse::Accepted().json(task)) } @@ -497,7 +501,7 @@ pub async fn delete_documents_by_filter( req: HttpRequest, analytics: web::Data, ) -> Result { - debug!("called with params: {:?}", body); + debug!(parameters = ?body, "Delete documents by filter"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = index_uid.into_inner(); let filter = body.into_inner().filter; @@ -515,7 +519,7 @@ pub async fn delete_documents_by_filter( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Delete documents by filter"); Ok(HttpResponse::Accepted().json(task)) } @@ -532,7 +536,7 @@ pub async fn clear_all_documents( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Delete all documents"); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 4b5d4d78a..a980fb278 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -2,12 +2,12 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use serde_json::Value; +use tracing::debug; use crate::analytics::{Analytics, FacetSearchAggregator}; use crate::extractors::authentication::policies::*; @@ -56,7 +56,7 @@ pub async fn search( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let query = params.into_inner(); - debug!("facet search called with params: {:?}", query); + debug!(parameters = ?query, "Facet search"); let mut aggregate = FacetSearchAggregator::from_query(&query, &req); @@ -83,7 +83,7 @@ pub async fn search( let search_result = search_result?; - debug!("returns: {:?}", search_result); + debug!(returns = ?search_result, "Facet search"); Ok(HttpResponse::Ok().json(search_result)) } diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 81b5c3f2e..d80bd9c61 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -5,7 +5,6 @@ use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::{DeserializeError, Deserr, ValuePointerRef}; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::error::deserr_codes::*; @@ -16,6 +15,7 @@ use meilisearch_types::tasks::KindWithContent; use serde::Serialize; use serde_json::json; use time::OffsetDateTime; +use tracing::debug; use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use crate::analytics::Analytics; @@ -93,6 +93,7 @@ pub async fn list_indexes( index_scheduler: GuardedData, Data>, paginate: AwebQueryParameter, ) -> Result { + debug!(parameters = ?paginate, "List indexes"); let filters = index_scheduler.filters(); let indexes: Vec> = index_scheduler.try_for_each_index(|uid, index| -> Result, _> { @@ -105,7 +106,7 @@ pub async fn list_indexes( let indexes: Vec = indexes.into_iter().flatten().collect(); let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter()); - debug!("returns: {:?}", ret); + debug!(returns = ?ret, "List indexes"); Ok(HttpResponse::Ok().json(ret)) } @@ -124,6 +125,7 @@ pub async fn create_index( req: HttpRequest, analytics: web::Data, ) -> Result { + debug!(parameters = ?body, "Create index"); let IndexCreateRequest { primary_key, uid } = body.into_inner(); let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid); @@ -137,6 +139,7 @@ pub async fn create_index( let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key }; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + debug!(returns = ?task, "Create index"); Ok(HttpResponse::Accepted().json(task)) } else { @@ -177,7 +180,7 @@ pub async fn get_index( let index = index_scheduler.index(&index_uid)?; let index_view = IndexView::new(index_uid.into_inner(), &index)?; - debug!("returns: {:?}", index_view); + debug!(returns = ?index_view, "Get index"); Ok(HttpResponse::Ok().json(index_view)) } @@ -189,7 +192,7 @@ pub async fn update_index( req: HttpRequest, analytics: web::Data, ) -> Result { - debug!("called with params: {:?}", body); + debug!(parameters = ?body, "Update index"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let body = body.into_inner(); analytics.publish( @@ -206,7 +209,7 @@ pub async fn update_index( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Update index"); Ok(HttpResponse::Accepted().json(task)) } @@ -218,6 +221,7 @@ pub async fn delete_index( let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() }; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + debug!(returns = ?task, "Delete index"); Ok(HttpResponse::Accepted().json(task)) } @@ -255,6 +259,6 @@ pub async fn get_index_stats( let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?); - debug!("returns: {:?}", stats); + debug!(returns = ?stats, "Get index stats"); Ok(HttpResponse::Ok().json(stats)) } diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index c474d285e..3adfce970 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -2,7 +2,6 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use index_scheduler::IndexScheduler; -use log::{debug, warn}; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::error::deserr_codes::*; @@ -12,6 +11,7 @@ use meilisearch_types::milli; use meilisearch_types::milli::vector::DistributionShift; use meilisearch_types::serde_cs::vec::CS; use serde_json::Value; +use tracing::{debug, warn}; use crate::analytics::{Analytics, SearchAggregator}; use crate::extractors::authentication::policies::*; @@ -186,7 +186,7 @@ pub async fn search_with_url_query( req: HttpRequest, analytics: web::Data, ) -> Result { - debug!("called with params: {:?}", params); + debug!(parameters = ?params, "Search get"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let mut query: SearchQuery = params.into_inner().into(); @@ -213,7 +213,7 @@ pub async fn search_with_url_query( let search_result = search_result?; - debug!("returns: {:?}", search_result); + debug!(returns = ?search_result, "Search get"); Ok(HttpResponse::Ok().json(search_result)) } @@ -227,7 +227,7 @@ pub async fn search_with_post( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let mut query = params.into_inner(); - debug!("search called with params: {:?}", query); + debug!(parameters = ?query, "Search post"); // Tenant token search_rules. if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { @@ -252,7 +252,7 @@ pub async fn search_with_post( let search_result = search_result?; - debug!("returns: {:?}", search_result); + debug!(returns = ?search_result, "Search post"); Ok(HttpResponse::Ok().json(search_result)) } diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index decc4ffc9..23e8925c7 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -2,7 +2,6 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::facet_values_sort::FacetValuesSort; @@ -11,6 +10,7 @@ use meilisearch_types::milli::update::Setting; use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked}; use meilisearch_types::tasks::KindWithContent; use serde_json::json; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; @@ -24,12 +24,12 @@ macro_rules! make_setting_route { use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse, Resource}; use index_scheduler::IndexScheduler; - use log::debug; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::update::Setting; use meilisearch_types::settings::{settings, Settings}; use meilisearch_types::tasks::KindWithContent; + use tracing::debug; use $crate::analytics::Analytics; use $crate::extractors::authentication::policies::*; use $crate::extractors::authentication::GuardedData; @@ -61,7 +61,7 @@ macro_rules! make_setting_route { .await?? .into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Delete settings"); Ok(HttpResponse::Accepted().json(task)) } @@ -78,6 +78,7 @@ macro_rules! make_setting_route { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let body = body.into_inner(); + debug!(parameters = ?body, "Update settings"); #[allow(clippy::redundant_closure_call)] $analytics(&body, &req); @@ -109,7 +110,7 @@ macro_rules! make_setting_route { .await?? .into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Update settings"); Ok(HttpResponse::Accepted().json(task)) } @@ -126,7 +127,7 @@ macro_rules! make_setting_route { let rtxn = index.read_txn()?; let settings = settings(&index, &rtxn)?; - debug!("returns: {:?}", settings); + debug!(returns = ?settings, "Update settings"); let mut json = serde_json::json!(&settings); let val = json[$camelcase_attr].take(); @@ -656,6 +657,7 @@ pub async fn update_all( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let new_settings = body.into_inner(); + debug!(parameters = ?new_settings, "Update all settings"); let new_settings = validate_settings(new_settings, &index_scheduler)?; analytics.publish( @@ -768,7 +770,7 @@ pub async fn update_all( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Update all settings"); Ok(HttpResponse::Accepted().json(task)) } @@ -781,7 +783,7 @@ pub async fn get_all( let index = index_scheduler.index(&index_uid)?; let rtxn = index.read_txn()?; let new_settings = settings(&index, &rtxn)?; - debug!("returns: {:?}", new_settings); + debug!(returns = ?new_settings, "Get all settings"); Ok(HttpResponse::Ok().json(new_settings)) } @@ -804,7 +806,7 @@ pub async fn delete_all( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Delete all settings"); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs new file mode 100644 index 000000000..d95f80bb8 --- /dev/null +++ b/meilisearch/src/routes/logs.rs @@ -0,0 +1,281 @@ +use std::convert::Infallible; +use std::io::Write; +use std::ops::ControlFlow; +use std::pin::Pin; +use std::str::FromStr; +use std::sync::Arc; + +use actix_web::web::{Bytes, Data}; +use actix_web::{web, HttpResponse}; +use deserr::actix_web::AwebJson; +use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef}; +use futures_util::Stream; +use index_scheduler::IndexScheduler; +use meilisearch_types::deserr::DeserrJsonError; +use meilisearch_types::error::deserr_codes::*; +use meilisearch_types::error::{Code, ResponseError}; +use tokio::sync::mpsc; +use tracing_subscriber::filter::Targets; +use tracing_subscriber::Layer; + +use crate::error::MeilisearchHttpError; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; +use crate::extractors::sequential_extractor::SeqHandler; +use crate::LogRouteHandle; + +pub fn configure(cfg: &mut web::ServiceConfig) { + cfg.service( + web::resource("stream") + .route(web::post().to(SeqHandler(get_logs))) + .route(web::delete().to(SeqHandler(cancel_logs))), + ); +} + +#[derive(Debug, Default, Clone, Copy, Deserr, PartialEq, Eq)] +#[deserr(rename_all = camelCase)] +pub enum LogMode { + #[default] + Human, + Profile, +} + +/// Simple wrapper around the `Targets` from `tracing_subscriber` to implement `MergeWithError` on it. +#[derive(Clone, Debug)] +struct MyTargets(Targets); + +/// Simple wrapper around the `ParseError` from `tracing_subscriber` to implement `MergeWithError` on it. +#[derive(Debug, thiserror::Error)] +enum MyParseError { + #[error(transparent)] + ParseError(#[from] tracing_subscriber::filter::ParseError), + #[error( + "Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`" + )] + Example, +} + +impl FromStr for MyTargets { + type Err = MyParseError; + + fn from_str(s: &str) -> Result { + if s.is_empty() { + Err(MyParseError::Example) + } else { + Ok(MyTargets(Targets::from_str(s).map_err(MyParseError::ParseError)?)) + } + } +} + +impl MergeWithError for DeserrJsonError { + fn merge( + _self_: Option, + other: MyParseError, + merge_location: ValuePointerRef, + ) -> ControlFlow { + Self::error::( + None, + ErrorKind::Unexpected { msg: other.to_string() }, + merge_location, + ) + } +} + +#[derive(Debug, Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields, validate = validate_get_logs -> DeserrJsonError)] +pub struct GetLogs { + #[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError)] + target: MyTargets, + + #[deserr(default, error = DeserrJsonError)] + mode: LogMode, + + #[deserr(default = false, error = DeserrJsonError)] + profile_memory: bool, +} + +fn validate_get_logs( + logs: GetLogs, + location: ValuePointerRef, +) -> Result { + if logs.profile_memory && logs.mode != LogMode::Profile { + Err(deserr::take_cf_content(E::error::( + None, + ErrorKind::Unexpected { + msg: format!("`profile_memory` can only be used while profiling code and is not compatible with the {:?} mode.", logs.mode), + }, + location, + ))) + } else { + Ok(logs) + } +} + +struct LogWriter { + sender: mpsc::UnboundedSender>, +} + +impl Write for LogWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.sender.send(buf.to_vec()).map_err(std::io::Error::other)?; + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +struct HandleGuard { + /// We need to keep an handle on the logs to make it available again when the streamer is dropped + logs: Arc, +} + +impl Drop for HandleGuard { + fn drop(&mut self) { + if let Err(e) = self.logs.modify(|layer| *layer.inner_mut() = None) { + tracing::error!("Could not free the logs route: {e}"); + } + } +} + +fn byte_stream( + receiver: mpsc::UnboundedReceiver>, + guard: HandleGuard, +) -> impl futures_util::Stream> { + futures_util::stream::unfold((receiver, guard), move |(mut receiver, guard)| async move { + let vec = receiver.recv().await; + + vec.map(From::from).map(Ok).map(|a| (a, (receiver, guard))) + }) +} + +type PinnedByteStream = Pin>>>; + +fn make_layer< + S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>, +>( + opt: &GetLogs, + logs: Data, +) -> (Box + Send + Sync>, PinnedByteStream) { + let guard = HandleGuard { logs: logs.into_inner() }; + match opt.mode { + LogMode::Human => { + let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); + + let fmt_layer = tracing_subscriber::fmt::layer() + .with_writer(move || LogWriter { sender: sender.clone() }) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE); + + let stream = byte_stream(receiver, guard); + (Box::new(fmt_layer) as Box + Send + Sync>, Box::pin(stream)) + } + LogMode::Profile => { + let (trace, layer) = tracing_trace::Trace::new(opt.profile_memory); + + let stream = entry_stream(trace, guard); + + (Box::new(layer) as Box + Send + Sync>, Box::pin(stream)) + } + } +} + +fn entry_stream( + trace: tracing_trace::Trace, + guard: HandleGuard, +) -> impl Stream> { + let receiver = trace.into_receiver(); + let entry_buf = Vec::new(); + + futures_util::stream::unfold( + (receiver, entry_buf, guard), + move |(mut receiver, mut entry_buf, guard)| async move { + let mut bytes = Vec::new(); + + while bytes.len() < 8192 { + entry_buf.clear(); + + let Ok(count) = tokio::time::timeout( + std::time::Duration::from_secs(1), + receiver.recv_many(&mut entry_buf, 100), + ) + .await + else { + break; + }; + + if count == 0 { + if !bytes.is_empty() { + break; + } + + // channel closed, exit + return None; + } + + for entry in &entry_buf { + if let Err(error) = serde_json::to_writer(&mut bytes, entry) { + tracing::error!( + error = &error as &dyn std::error::Error, + "deserializing entry" + ); + return Some(( + Err(ResponseError::from_msg( + format!("error deserializing entry: {error}"), + Code::Internal, + )), + (receiver, entry_buf, guard), + )); + } + } + } + + Some((Ok(bytes.into()), (receiver, entry_buf, guard))) + }, + ) +} + +pub async fn get_logs( + index_scheduler: GuardedData, Data>, + logs: Data, + body: AwebJson, +) -> Result { + index_scheduler.features().check_logs_route()?; + + let opt = body.into_inner(); + let mut stream = None; + + logs.modify(|layer| match layer.inner_mut() { + None => { + // there is no one getting logs + *layer.filter_mut() = opt.target.0.clone(); + let (new_layer, new_stream) = make_layer(&opt, logs.clone()); + + *layer.inner_mut() = Some(new_layer); + stream = Some(new_stream); + } + Some(_) => { + // there is already someone getting logs + } + }) + .unwrap(); + + if let Some(stream) = stream { + Ok(HttpResponse::Ok().streaming(stream)) + } else { + Err(MeilisearchHttpError::AlreadyUsedLogRoute.into()) + } +} + +pub async fn cancel_logs( + index_scheduler: GuardedData, Data>, + logs: Data, +) -> Result { + index_scheduler.features().check_logs_route()?; + + if let Err(e) = logs.modify(|layer| *layer.inner_mut() = None) { + tracing::error!("Could not free the logs route: {e}"); + } + + Ok(HttpResponse::NoContent().finish()) +} diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 0e5623b09..89cf63c50 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -3,7 +3,6 @@ use std::collections::BTreeMap; use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_auth::AuthController; use meilisearch_types::error::ResponseError; use meilisearch_types::settings::{Settings, Unchecked}; @@ -11,6 +10,7 @@ use meilisearch_types::tasks::{Kind, Status, Task, TaskId}; use serde::{Deserialize, Serialize}; use serde_json::json; use time::OffsetDateTime; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; @@ -22,6 +22,7 @@ mod api_key; mod dump; pub mod features; pub mod indexes; +mod logs; mod metrics; mod multi_search; mod snapshot; @@ -31,6 +32,7 @@ pub mod tasks; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::scope("/tasks").configure(tasks::configure)) .service(web::resource("/health").route(web::get().to(get_health))) + .service(web::scope("/logs").configure(logs::configure)) .service(web::scope("/keys").configure(api_key::configure)) .service(web::scope("/dumps").configure(dump::configure)) .service(web::scope("/snapshots").configure(snapshot::configure)) @@ -250,7 +252,7 @@ async fn get_stats( let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?; - debug!("returns: {:?}", stats); + debug!(returns = ?stats, "Get stats"); Ok(HttpResponse::Ok().json(stats)) } diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs index 8e81688e6..86aa58e70 100644 --- a/meilisearch/src/routes/multi_search.rs +++ b/meilisearch/src/routes/multi_search.rs @@ -3,11 +3,11 @@ use actix_web::web::{self, Data}; use actix_web::{HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; use serde::Serialize; +use tracing::debug; use crate::analytics::{Analytics, MultiSearchAggregator}; use crate::extractors::authentication::policies::ActionPolicy; @@ -52,7 +52,7 @@ pub async fn multi_search_with_post( for (query_index, (index_uid, mut query)) in queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate() { - debug!("multi-search #{query_index}: called with params: {:?}", query); + debug!(on_index = query_index, parameters = ?query, "Multi-search"); // Check index from API key if !index_scheduler.filters().is_index_authorized(&index_uid) { @@ -107,7 +107,7 @@ pub async fn multi_search_with_post( err })?; - debug!("returns: {:?}", search_results); + debug!(returns = ?search_results, "Multi-search"); Ok(HttpResponse::Ok().json(SearchResults { results: search_results })) } diff --git a/meilisearch/src/routes/snapshot.rs b/meilisearch/src/routes/snapshot.rs index 7fa22658a..c94529932 100644 --- a/meilisearch/src/routes/snapshot.rs +++ b/meilisearch/src/routes/snapshot.rs @@ -1,10 +1,10 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; use serde_json::json; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; @@ -27,6 +27,6 @@ pub async fn create_snapshot( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Create snapshot"); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/tests/auth/authorization.rs b/meilisearch/tests/auth/authorization.rs index af028060d..d26bb26b8 100644 --- a/meilisearch/tests/auth/authorization.rs +++ b/meilisearch/tests/auth/authorization.rs @@ -59,6 +59,8 @@ pub static AUTHORIZATIONS: Lazy hashset!{"snapshots.create", "snapshots.*", "*"}, ("GET", "/version") => hashset!{"version", "*"}, ("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"}, + ("POST", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"}, + ("DELETE", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"}, ("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"}, ("GET", "/keys/mykey/") => hashset!{"keys.get", "*"}, ("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"}, diff --git a/meilisearch/tests/common/server.rs b/meilisearch/tests/common/server.rs index 27feb187f..134124cc8 100644 --- a/meilisearch/tests/common/server.rs +++ b/meilisearch/tests/common/server.rs @@ -13,6 +13,8 @@ use meilisearch::{analytics, create_app, setup_meilisearch}; use once_cell::sync::Lazy; use tempfile::TempDir; use tokio::time::sleep; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::Layer; use super::index::Index; use super::service::Service; @@ -81,10 +83,16 @@ impl Server { Response = ServiceResponse, Error = actix_web::Error, > { + let (_route_layer, route_layer_handle) = + tracing_subscriber::reload::Layer::new(None.with_filter( + tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF), + )); + actix_web::test::init_service(create_app( self.service.index_scheduler.clone().into(), self.service.auth.clone().into(), self.service.options.clone(), + route_layer_handle, analytics::MockAnalytics::new(&self.service.options), true, )) diff --git a/meilisearch/tests/common/service.rs b/meilisearch/tests/common/service.rs index 078ddb6e5..4c23a18d8 100644 --- a/meilisearch/tests/common/service.rs +++ b/meilisearch/tests/common/service.rs @@ -7,6 +7,8 @@ use actix_web::test::TestRequest; use index_scheduler::IndexScheduler; use meilisearch::{analytics, create_app, Opt}; use meilisearch_auth::AuthController; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::Layer; use crate::common::encoder::Encoder; use crate::common::Value; @@ -105,10 +107,16 @@ impl Service { } pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) { + let (_route_layer, route_layer_handle) = + tracing_subscriber::reload::Layer::new(None.with_filter( + tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF), + )); + let app = test::init_service(create_app( self.index_scheduler.clone().into(), self.auth.clone().into(), self.options.clone(), + route_layer_handle, analytics::MockAnalytics::new(&self.options), true, )) diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs index 632180aac..e8061ae4a 100644 --- a/meilisearch/tests/dumps/mod.rs +++ b/meilisearch/tests/dumps/mod.rs @@ -1847,6 +1847,7 @@ async fn import_dump_v6_containing_experimental_features() { { "vectorStore": false, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); diff --git a/meilisearch/tests/features/mod.rs b/meilisearch/tests/features/mod.rs index a8147f111..3a9812f30 100644 --- a/meilisearch/tests/features/mod.rs +++ b/meilisearch/tests/features/mod.rs @@ -20,6 +20,7 @@ async fn experimental_features() { { "vectorStore": false, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -31,6 +32,7 @@ async fn experimental_features() { { "vectorStore": true, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -42,6 +44,7 @@ async fn experimental_features() { { "vectorStore": true, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -54,6 +57,7 @@ async fn experimental_features() { { "vectorStore": true, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -66,6 +70,7 @@ async fn experimental_features() { { "vectorStore": true, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -85,6 +90,7 @@ async fn experimental_feature_metrics() { { "vectorStore": false, "metrics": true, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -140,7 +146,7 @@ async fn errors() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `exportPuffinReports`", + "message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`", "code": "bad_request", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#bad_request" diff --git a/meilisearch/tests/integration.rs b/meilisearch/tests/integration.rs index b6992791a..943af802a 100644 --- a/meilisearch/tests/integration.rs +++ b/meilisearch/tests/integration.rs @@ -5,6 +5,7 @@ mod documents; mod dumps; mod features; mod index; +mod logs; mod search; mod settings; mod snapshot; diff --git a/meilisearch/tests/logs/error.rs b/meilisearch/tests/logs/error.rs new file mode 100644 index 000000000..4f4d741e3 --- /dev/null +++ b/meilisearch/tests/logs/error.rs @@ -0,0 +1,182 @@ +use meili_snap::*; + +use crate::common::Server; +use crate::json; + +#[actix_rt::test] +async fn logs_stream_bad_target() { + let server = Server::new().await; + + // Wrong type + let (response, code) = server.service.post("/logs/stream", json!({ "target": true })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.target`: expected a string, but found a boolean: `true`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Wrong type + let (response, code) = server.service.post("/logs/stream", json!({ "target": [] })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.target`: expected a string, but found an array: `[]`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Our help message + let (response, code) = server.service.post("/logs/stream", json!({ "target": "" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value at `.target`: Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // An error from the target parser + let (response, code) = server.service.post("/logs/stream", json!({ "target": "==" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value at `.target`: invalid filter directive: too many '=' in filter directive, expected 0 or 1", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); +} + +#[actix_rt::test] +async fn logs_stream_bad_mode() { + let server = Server::new().await; + + // Wrong type + let (response, code) = server.service.post("/logs/stream", json!({ "mode": true })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.mode`: expected a string, but found a boolean: `true`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Wrong type + let (response, code) = server.service.post("/logs/stream", json!({ "mode": [] })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.mode`: expected a string, but found an array: `[]`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Wrong value + let (response, code) = server.service.post("/logs/stream", json!({ "mode": "tamo" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Unknown value `tamo` at `.mode`: expected one of `human`, `profile`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); +} + +#[actix_rt::test] +async fn logs_stream_bad_profile_memory() { + let server = Server::new().await; + + // Wrong type + let (response, code) = + server.service.post("/logs/stream", json!({ "profileMemory": "tamo" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.profileMemory`: expected a boolean, but found a string: `\"tamo\"`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Wrong type + let (response, code) = + server.service.post("/logs/stream", json!({ "profileMemory": ["hello", "kefir"] })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.profileMemory`: expected a boolean, but found an array: `[\"hello\",\"kefir\"]`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Used with default parameters + let (response, code) = + server.service.post("/logs/stream", json!({ "profileMemory": true })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Human mode.", + "code": "invalid_settings_typo_tolerance", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance" + } + "###); + + // Used with an unsupported mode + let (response, code) = + server.service.post("/logs/stream", json!({ "mode": "fmt", "profileMemory": true })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Unknown value `fmt` at `.mode`: expected one of `human`, `profile`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); +} + +#[actix_rt::test] +async fn logs_stream_without_enabling_the_route() { + let server = Server::new().await; + + let (response, code) = server.service.post("/logs/stream", json!({})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); + + let (response, code) = server.service.delete("/logs/stream").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); +} diff --git a/meilisearch/tests/logs/mod.rs b/meilisearch/tests/logs/mod.rs new file mode 100644 index 000000000..0002fe33c --- /dev/null +++ b/meilisearch/tests/logs/mod.rs @@ -0,0 +1,92 @@ +mod error; + +use std::rc::Rc; +use std::str::FromStr; + +use actix_web::http::header::ContentType; +use meili_snap::snapshot; +use meilisearch::{analytics, create_app, Opt}; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::Layer; + +use crate::common::{default_settings, Server}; +use crate::json; + +#[actix_web::test] +async fn basic_test_log_stream_route() { + let db_path = tempfile::tempdir().unwrap(); + let server = Server::new_with_options(Opt { + experimental_enable_logs_route: true, + ..default_settings(db_path.path()) + }) + .await + .unwrap(); + + let (route_layer, route_layer_handle) = + tracing_subscriber::reload::Layer::new(None.with_filter( + tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF), + )); + + let subscriber = tracing_subscriber::registry().with(route_layer).with( + tracing_subscriber::fmt::layer() + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) + .with_filter(tracing_subscriber::filter::LevelFilter::from_str("INFO").unwrap()), + ); + + let app = actix_web::test::init_service(create_app( + server.service.index_scheduler.clone().into(), + server.service.auth.clone().into(), + server.service.options.clone(), + route_layer_handle, + analytics::MockAnalytics::new(&server.service.options), + true, + )) + .await; + + // set the subscriber as the default for the application + tracing::subscriber::set_global_default(subscriber).unwrap(); + + let app = Rc::new(app); + + // First, we start listening on the `/logs/stream` route + let handle_app = app.clone(); + let handle = tokio::task::spawn_local(async move { + let req = actix_web::test::TestRequest::post() + .uri("/logs/stream") + .insert_header(ContentType::json()) + .set_payload( + serde_json::to_vec(&json!({ + "mode": "human", + "target": "info", + })) + .unwrap(), + ); + let req = req.to_request(); + let ret = actix_web::test::call_service(&*handle_app, req).await; + actix_web::test::read_body(ret).await + }); + + // We're going to create an index to get at least one info log saying we processed a batch of task + let (ret, _code) = server.create_index(json!({ "uid": "tamo" })).await; + snapshot!(ret, @r###" + { + "taskUid": 0, + "indexUid": "tamo", + "status": "enqueued", + "type": "indexCreation", + "enqueuedAt": "[date]" + } + "###); + server.wait_task(ret.uid()).await; + + let req = actix_web::test::TestRequest::delete().uri("/logs/stream"); + let req = req.to_request(); + let ret = actix_web::test::call_service(&*app, req).await; + let code = ret.status(); + snapshot!(code, @"204 No Content"); + + let logs = handle.await.unwrap(); + let logs = String::from_utf8(logs.to_vec()).unwrap(); + assert!(logs.contains("INFO"), "{logs}"); +} diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index d3e556ab3..85bc96d86 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/meilisearch/tests/search/hybrid.rs @@ -15,6 +15,7 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde { "vectorStore": true, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); diff --git a/meilisearch/tests/tasks/webhook.rs b/meilisearch/tests/tasks/webhook.rs index 6979ff294..a18a93edb 100644 --- a/meilisearch/tests/tasks/webhook.rs +++ b/meilisearch/tests/tasks/webhook.rs @@ -44,10 +44,6 @@ struct WebhookHandle { } async fn create_webhook_server() -> WebhookHandle { - let mut log_builder = env_logger::Builder::new(); - log_builder.parse_filters("info"); - log_builder.init(); - let (sender, receiver) = mpsc::unbounded_channel(); let sender = Arc::new(sender); diff --git a/milli/Cargo.toml b/milli/Cargo.toml index dc2b992e0..4bc05d2cc 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -71,7 +71,6 @@ itertools = "0.11.0" puffin = "0.16.0" # logging -log = "0.4.20" logging_timer = "1.1.0" csv = "1.3.0" candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" } @@ -91,6 +90,7 @@ tiktoken-rs = "0.5.8" liquid = "0.26.4" arroy = "0.2.0" rand = "0.8.5" +tracing = "0.1.40" [dev-dependencies] mimalloc = { version = "0.1.39", default-features = false } @@ -102,15 +102,7 @@ meili-snap = { path = "../meili-snap" } rand = { version = "0.8.5", features = ["small_rng"] } [features] -all-tokenizations = [ - "charabia/chinese", - "charabia/hebrew", - "charabia/japanese", - "charabia/thai", - "charabia/korean", - "charabia/greek", - "charabia/khmer", -] +all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"] # Use POSIX semaphores instead of SysV semaphores in LMDB # For more information on this feature, see heed's Cargo.toml diff --git a/milli/src/documents/reader.rs b/milli/src/documents/reader.rs index a8a4c662d..d5eda69d4 100644 --- a/milli/src/documents/reader.rs +++ b/milli/src/documents/reader.rs @@ -25,6 +25,7 @@ impl DocumentsBatchReader { /// /// It first retrieves the index, then moves to the first document. Use the `into_cursor` /// method to iterator over the documents, from the first to the last. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn from_reader(reader: R) -> Result { let reader = grenad::Reader::new(reader)?; let mut cursor = reader.into_cursor()?; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 7bac5ea0c..e411bd032 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -6,9 +6,9 @@ use charabia::Normalize; use fst::automaton::{Automaton, Str}; use fst::{IntoStreamer, Streamer}; use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; -use log::error; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; +use tracing::error; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index b439b87ec..e7bafaf70 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -166,6 +166,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( continue; } + let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id()); + let entered = span.enter(); + let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket( ctx, logger, @@ -175,6 +178,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( back!(); continue; }; + drop(entered); ranking_rule_scores.push(next_bucket.score); diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index a6c7ff2b1..6715939dc 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -14,6 +14,12 @@ impl<'t, 'i> ClearDocuments<'t, 'i> { ClearDocuments { wtxn, index } } + #[tracing::instrument( + level = "trace", + skip(self), + target = "indexing::documents", + name = "clear_documents" + )] pub fn execute(self) -> Result { puffin::profile_function!(); diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index ad8a838c8..400507c97 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -85,8 +85,8 @@ use charabia::normalizer::{Normalize, NormalizerOption}; use grenad::{CompressionType, SortAlgorithm}; use heed::types::{Bytes, DecodeIgnore, SerdeJson}; use heed::BytesEncode; -use log::debug; use time::OffsetDateTime; +use tracing::debug; use self::incremental::FacetsUpdateIncremental; use super::FacetsUpdateBulk; diff --git a/milli/src/update/index_documents/enrich.rs b/milli/src/update/index_documents/enrich.rs index 03eb3f4de..162136912 100644 --- a/milli/src/update/index_documents/enrich.rs +++ b/milli/src/update/index_documents/enrich.rs @@ -22,6 +22,7 @@ use crate::{FieldId, Index, Result}; /// # Panics /// /// - if reader.is_empty(), this function may panic in some cases +#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn enrich_documents_batch( rtxn: &heed::RoTxn, index: &Index, @@ -77,7 +78,7 @@ pub fn enrich_documents_batch( }, [] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)), [(field_id, name)] => { - log::info!("Primary key was not specified in index. Inferred to '{name}'"); + tracing::info!("Primary key was not specified in index. Inferred to '{name}'"); PrimaryKey::Flat { name, field_id: *field_id } } multiple => { @@ -143,6 +144,8 @@ pub fn enrich_documents_batch( /// Retrieve the document id after validating it, returning a `UserError` /// if the id is invalid or can't be guessed. +#[tracing::instrument(level = "trace", skip(uuid_buffer, documents_batch_index, document) +target = "indexing::documents")] fn fetch_or_generate_document_id( document: &obkv::KvReader, documents_batch_index: &DocumentsBatchIndex, diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index a6bbf939a..d568154b2 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -21,7 +21,7 @@ pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, R /// /// Returns the generated internal documents ids and a grenad reader /// with the list of extracted words from the given chunk of documents. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_docid_word_positions( obkv_documents: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs index f860aacba..33def5abd 100644 --- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs @@ -16,7 +16,7 @@ use crate::Result; /// /// Returns a grenad reader with the list of extracted facet numbers and /// documents ids from the given chunk of docid facet number positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_facet_number_docids( fid_docid_facet_number: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index 2ade776c3..d14be7464 100644 --- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -15,7 +15,7 @@ use crate::{FieldId, Result}; /// /// Returns a grenad reader with the list of extracted facet strings and /// documents ids from the given chunk of docid facet string positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_facet_string_docids( docid_fid_facet_string: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index b7de1e621..1f8af372d 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -39,7 +39,7 @@ pub struct ExtractedFacetValues { /// Returns the generated grenad reader containing the docid the fid and the orginal value as key /// and the normalized value as value extracted from the given chunk of documents. /// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_fid_docid_facet_values( obkv_documents: grenad::Reader, indexer: GrenadParameters, @@ -431,7 +431,7 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues { if let Ok(float) = original.parse() { output_numbers.push(float); } else { - log::warn!( + tracing::warn!( "Internal error, could not parse a geofield that has been validated. Please open an issue." ) } diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs index 182d0c5d8..305af3630 100644 --- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs +++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs @@ -19,7 +19,7 @@ const MAX_COUNTED_WORDS: usize = 30; /// /// Returns a grenad reader with the list of extracted field id word counts /// and documents ids from the given chunk of docid word positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_fid_word_count_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_geo_points.rs b/milli/src/update/index_documents/extract/extract_geo_points.rs index b3600e3bc..cfcc021c6 100644 --- a/milli/src/update/index_documents/extract/extract_geo_points.rs +++ b/milli/src/update/index_documents/extract/extract_geo_points.rs @@ -13,7 +13,7 @@ use crate::{FieldId, InternalError, Result}; /// Extracts the geographical coordinates contained in each document under the `_geo` field. /// /// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude) -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_geo_points( obkv_documents: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index 87181edc2..117f6cc8c 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -67,7 +67,7 @@ impl VectorStateDelta { /// Extracts the embedding vector contained in each document under the `_vectors` field. /// /// Returns the generated grenad reader containing the docid as key associated to the Vec -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_vector_points( obkv_documents: grenad::Reader, indexer: GrenadParameters, @@ -186,12 +186,12 @@ pub fn extract_vector_points( prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default(); let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?; if old_prompt != new_prompt { - log::trace!( + tracing::trace!( "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}" ); VectorStateDelta::NowGenerated(new_prompt) } else { - log::trace!("⏭️ Prompt unmodified, skipping"); + tracing::trace!("⏭️ Prompt unmodified, skipping"); VectorStateDelta::NoChange } } else { diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs index 66092821f..f38701dac 100644 --- a/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -23,7 +23,7 @@ use crate::{DocumentId, FieldId, Result}; /// /// The first returned reader is the one for normal word_docids, and the second one is for /// exact_word_docids -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_word_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, @@ -135,6 +135,7 @@ pub fn extract_word_docids( )) } +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] fn words_into_sorter( document_id: DocumentId, fid: FieldId, diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs index d364b3e13..82a94ce00 100644 --- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs @@ -19,7 +19,7 @@ use crate::{DocumentId, Result}; /// /// Returns a grenad reader with the list of extracted word pairs proximities and /// documents ids from the given chunk of docid word positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_word_pair_proximity_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, @@ -59,6 +59,10 @@ pub fn extract_word_pair_proximity_docids( if current_document_id.map_or(false, |id| id != document_id) { puffin::profile_scope!("Document into sorter"); + // FIXME: span inside of a hot loop might degrade performance and create big reports + let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter"); + let _entered = span.enter(); + document_word_positions_into_sorter( current_document_id.unwrap(), &del_word_pair_proximity, @@ -138,6 +142,10 @@ pub fn extract_word_pair_proximity_docids( if let Some(document_id) = current_document_id { puffin::profile_scope!("Final document into sorter"); + // FIXME: span inside of a hot loop might degrade performance and create big reports + let span = tracing::trace_span!(target: "indexing::details", "final_document_into_sorter"); + let _entered = span.enter(); + document_word_positions_into_sorter( document_id, &del_word_pair_proximity, @@ -147,6 +155,10 @@ pub fn extract_word_pair_proximity_docids( } { puffin::profile_scope!("sorter_into_reader"); + // FIXME: span inside of a hot loop might degrade performance and create big reports + let span = tracing::trace_span!(target: "indexing::details", "sorter_into_reader"); + let _entered = span.enter(); + let mut writer = create_writer( indexer.chunk_compression_type, indexer.chunk_compression_level, diff --git a/milli/src/update/index_documents/extract/extract_word_position_docids.rs b/milli/src/update/index_documents/extract/extract_word_position_docids.rs index 89b77d140..4bc553d9a 100644 --- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs @@ -18,7 +18,7 @@ use crate::{bucketed_position, DocumentId, Result}; /// /// Returns a grenad reader with the list of extracted words at positions and /// documents ids from the given chunk of docid word positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_word_position_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, @@ -94,6 +94,7 @@ pub fn extract_word_position_docids( Ok(word_position_docids_reader) } +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] fn words_position_into_sorter( document_id: DocumentId, key_buffer: &mut Vec, diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 43e9a36ec..44f54ff26 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -14,8 +14,8 @@ use std::fs::File; use std::io::BufReader; use crossbeam_channel::Sender; -use log::debug; use rayon::prelude::*; +use tracing::debug; use self::extract_docid_word_positions::extract_docid_word_positions; use self::extract_facet_number_docids::extract_facet_number_docids; @@ -41,6 +41,7 @@ use crate::{FieldId, FieldsIdsMap, Result}; /// Extract data for each databases from obkv documents in parallel. /// Send data in grenad file over provided Sender. #[allow(clippy::too_many_arguments)] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub(crate) fn data_from_obkv_documents( original_obkv_chunks: impl Iterator>>> + Send, flattened_obkv_chunks: impl Iterator>>> + Send, @@ -113,7 +114,7 @@ pub(crate) fn data_from_obkv_documents( { let lmdb_writer_sx = lmdb_writer_sx.clone(); rayon::spawn(move || { - debug!("merge {} database", "facet-id-exists-docids"); + debug!(database = "facet-id-exists-docids", "merge"); match facet_exists_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) { Ok(reader) => { let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetExistsDocids(reader))); @@ -129,7 +130,7 @@ pub(crate) fn data_from_obkv_documents( { let lmdb_writer_sx = lmdb_writer_sx.clone(); rayon::spawn(move || { - debug!("merge {} database", "facet-id-is-null-docids"); + debug!(database = "facet-id-is-null-docids", "merge"); match facet_is_null_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) { Ok(reader) => { let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader))); @@ -145,7 +146,7 @@ pub(crate) fn data_from_obkv_documents( { let lmdb_writer_sx = lmdb_writer_sx.clone(); rayon::spawn(move || { - debug!("merge {} database", "facet-id-is-empty-docids"); + debug!(database = "facet-id-is-empty-docids", "merge"); match facet_is_empty_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) { Ok(reader) => { let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader))); @@ -257,13 +258,22 @@ fn spawn_extraction_task( M: MergeableReader + FromParallelIterator + Send + 'static, M::Output: Send, { + let current_span = tracing::Span::current(); + rayon::spawn(move || { - puffin::profile_scope!("extract_multiple_chunks", name); + let child_span = + tracing::trace_span!(target: "", parent: ¤t_span, "extract_multiple_chunks"); + let _entered = child_span.enter(); + puffin::profile_scope!("extract_multiple_chunksdexing::details, ", name); let chunks: Result = chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect(); + let current_span = tracing::Span::current(); + rayon::spawn(move || match chunks { Ok(chunks) => { - debug!("merge {} database", name); + let child_span = tracing::trace_span!(target: "", parent: ¤t_span, "merge_multiple_chunks"); + let _entered = child_span.enter(); + debug!(database = name, "merge"); puffin::profile_scope!("merge_multiple_chunks", name); let reader = chunks.merge(merge_fn, &indexer); let _ = lmdb_writer_sx.send(reader.map(serialize_fn)); diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs index 9af9bd019..3e63fcf77 100644 --- a/milli/src/update/index_documents/helpers/grenad_helpers.rs +++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs @@ -56,6 +56,7 @@ pub fn create_sorter( builder.build() } +#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")] pub fn sorter_into_reader( sorter: grenad::Sorter, indexer: GrenadParameters, @@ -252,6 +253,7 @@ pub fn grenad_obkv_into_chunks( /// Write provided sorter in database using serialize_value function. /// merge_values function is used if an entry already exist in the database. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")] pub fn write_sorter_into_database( sorter: Sorter, database: &heed::Database, diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 83d2c08e1..36aa94964 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -13,11 +13,11 @@ use std::result::Result as StdResult; use crossbeam_channel::{Receiver, Sender}; use heed::types::Str; use heed::Database; -use log::debug; use rand::SeedableRng; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use slice_group_by::GroupBy; +use tracing::debug_span; use typed_chunk::{write_typed_chunk_into_index, TypedChunk}; use self::enrich::enrich_documents_batch; @@ -134,6 +134,7 @@ where /// return an error and not the `IndexDocuments` struct as it is invalid to use it afterward. /// /// Returns the number of documents added to the builder. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn add_documents( mut self, reader: DocumentsBatchReader, @@ -179,6 +180,7 @@ where /// Remove a batch of documents from the current builder. /// /// Returns the number of documents deleted from the builder. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn remove_documents( mut self, to_delete: Vec, @@ -214,6 +216,7 @@ where /// - No batching using the standards `remove_documents` and `add_documents` took place /// /// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::details")] pub fn remove_documents_from_db_no_batch( mut self, to_delete: &RoaringBitmap, @@ -237,7 +240,12 @@ where Ok((self, deleted_documents)) } - #[logging_timer::time("IndexDocuments::{}")] + #[tracing::instrument( + level = "trace" + skip_all, + target = "indexing::documents", + name = "index_documents" + )] pub fn execute(mut self) -> Result { puffin::profile_function!(); @@ -273,7 +281,12 @@ where } /// Returns the total number of documents in the index after the update. - #[logging_timer::time("IndexDocuments::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "profile::indexing::details", + name = "index_documents_raw" + )] pub fn execute_raw(self, output: TransformOutput) -> Result where FP: Fn(UpdateIndexingStep) + Sync, @@ -374,8 +387,12 @@ where let cloned_embedder = self.embedders.clone(); + let current_span = tracing::Span::current(); + // Run extraction pipeline in parallel. pool.install(|| { + let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "extract_and_send_grenad_chunks"); + let _enter = child_span.enter(); puffin::profile_scope!("extract_and_send_grenad_chunks"); // split obkv file into several chunks let original_chunk_iter = @@ -489,10 +506,7 @@ where documents_seen: documents_seen_count as usize, total_documents: documents_count, }); - debug!( - "We have seen {} documents on {} total document so far", - documents_seen_count, documents_count - ); + debug_span!("Seen", documents = documents_seen_count, total = documents_count); } if is_merged_database { databases_seen += 1; @@ -543,7 +557,12 @@ where Ok(number_of_documents) } - #[logging_timer::time("IndexDocuments::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "index_documents_prefix_databases" + )] pub fn execute_prefix_databases( self, word_docids: Option>, @@ -598,6 +617,8 @@ where let del_prefix_fst_words; { + let span = tracing::trace_span!(target: "indexing::details", "compute_prefix_diffs"); + let _entered = span.enter(); puffin::profile_scope!("compute_prefix_diffs"); current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?; @@ -722,6 +743,12 @@ where /// Run the word prefix docids update operation. #[allow(clippy::too_many_arguments)] +#[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "index_documents_word_prefix_docids" +)] fn execute_word_prefix_docids( txn: &mut heed::RwTxn, reader: grenad::Reader>, diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index ab8e27edb..e5392092f 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -146,7 +146,7 @@ impl<'a, 'i> Transform<'a, 'i> { }) } - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn read_documents( &mut self, reader: EnrichedDocumentsBatchReader, @@ -359,7 +359,7 @@ impl<'a, 'i> Transform<'a, 'i> { /// - If the document to remove was inserted by the `read_documents` method before but was NOT present in the db, /// it's added into the grenad to ensure we don't insert it + removed from the list of new documents ids. /// - If the document to remove was not present in either the db or the transform we do nothing. - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn remove_documents( &mut self, mut to_remove: Vec, @@ -450,7 +450,7 @@ impl<'a, 'i> Transform<'a, 'i> { /// - No batching using the standards `remove_documents` and `add_documents` took place /// /// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function. - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::details")] pub fn remove_documents_from_db_no_batch( &mut self, to_remove: &RoaringBitmap, @@ -541,6 +541,7 @@ impl<'a, 'i> Transform<'a, 'i> { // Flatten a document from the fields ids map contained in self and insert the new // created fields. Returns `None` if the document doesn't need to be flattened. + #[tracing::instrument(level = "trace", skip(self, obkv), target = "indexing::transform")] fn flatten_from_fields_ids_map(&mut self, obkv: KvReader) -> Result>> { if obkv .iter() @@ -661,7 +662,7 @@ impl<'a, 'i> Transform<'a, 'i> { /// Generate the `TransformOutput` based on the given sorter that can be generated from any /// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document /// id for the user side and the value must be an obkv where keys are valid fields ids. - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::transform")] pub(crate) fn output_from_sorter( self, wtxn: &mut heed::RwTxn, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 58cb9bb02..af828fee6 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -115,6 +115,7 @@ impl TypedChunk { /// Write typed chunk in the corresponding LMDB database of the provided index. /// Return new documents seen. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] pub(crate) fn write_typed_chunk_into_index( typed_chunk: TypedChunk, index: &Index, @@ -126,6 +127,8 @@ pub(crate) fn write_typed_chunk_into_index( let mut is_merged_database = false; match typed_chunk { TypedChunk::Documents(obkv_documents_iter) => { + let span = tracing::trace_span!(target: "indexing::write_db", "documents"); + let _entered = span.enter(); let mut operations: Vec = Default::default(); let mut docids = index.documents_ids(wtxn)?; @@ -172,6 +175,9 @@ pub(crate) fn write_typed_chunk_into_index( index.put_documents_ids(wtxn, &docids)?; } TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_word_count_docids"); + let _entered = span.enter(); append_entries_into_database( fid_word_count_docids_iter, &index.field_id_word_count_docids, @@ -187,6 +193,8 @@ pub(crate) fn write_typed_chunk_into_index( exact_word_docids_reader, word_fid_docids_reader, } => { + let span = tracing::trace_span!(target: "indexing::write_db", "word_docids"); + let _entered = span.enter(); let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_reader) }?; append_entries_into_database( word_docids_iter.clone(), @@ -230,6 +238,8 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::WordPositionDocids(word_position_docids_iter) => { + let span = tracing::trace_span!(target: "indexing::write_db", "word_position_docids"); + let _entered = span.enter(); append_entries_into_database( word_position_docids_iter, &index.word_position_docids, @@ -241,16 +251,25 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db","field_id_facet_number_docids"); + let _entered = span.enter(); let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter); indexer.execute(wtxn)?; is_merged_database = true; } TypedChunk::FieldIdFacetStringDocids(facet_id_string_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_facet_string_docids"); + let _entered = span.enter(); let indexer = FacetsUpdate::new(index, FacetType::String, facet_id_string_docids_iter); indexer.execute(wtxn)?; is_merged_database = true; } TypedChunk::FieldIdFacetExistsDocids(facet_id_exists_docids) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_facet_exists_docids"); + let _entered = span.enter(); append_entries_into_database( facet_id_exists_docids, &index.facet_id_exists_docids, @@ -262,6 +281,9 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdFacetIsNullDocids(facet_id_is_null_docids) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_null_docids"); + let _entered = span.enter(); append_entries_into_database( facet_id_is_null_docids, &index.facet_id_is_null_docids, @@ -273,6 +295,8 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => { + let span = tracing::trace_span!(target: "profile::indexing::write_db", "field_id_facet_is_empty_docids"); + let _entered = span.enter(); append_entries_into_database( facet_id_is_empty_docids, &index.facet_id_is_empty_docids, @@ -284,6 +308,9 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "word_pair_proximity_docids"); + let _entered = span.enter(); append_entries_into_database( word_pair_proximity_docids_iter, &index.word_pair_proximity_docids, @@ -295,6 +322,9 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_numbers"); + let _entered = span.enter(); let index_fid_docid_facet_numbers = index.field_id_docid_facet_f64s.remap_types::(); let mut cursor = fid_docid_facet_number.into_cursor()?; @@ -315,6 +345,9 @@ pub(crate) fn write_typed_chunk_into_index( } } TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_strings"); + let _entered = span.enter(); let index_fid_docid_facet_strings = index.field_id_docid_facet_strings.remap_types::(); let mut cursor = fid_docid_facet_string.into_cursor()?; @@ -335,6 +368,8 @@ pub(crate) fn write_typed_chunk_into_index( } } TypedChunk::GeoPoints(geo_points) => { + let span = tracing::trace_span!(target: "indexing::write_db", "geo_points"); + let _entered = span.enter(); let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default(); let mut geo_faceted_docids = index.geo_faceted_documents_ids(wtxn)?; @@ -365,6 +400,8 @@ pub(crate) fn write_typed_chunk_into_index( expected_dimension, embedder_name, } => { + let span = tracing::trace_span!(target: "indexing::write_db", "vector_points"); + let _entered = span.enter(); let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; @@ -480,9 +517,11 @@ pub(crate) fn write_typed_chunk_into_index( } } - log::debug!("Finished vector chunk for {}", embedder_name); + tracing::debug!("Finished vector chunk for {}", embedder_name); } TypedChunk::ScriptLanguageDocids(sl_map) => { + let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids"); + let _entered = span.enter(); for (key, (deletion, addition)) in sl_map { let mut db_key_exists = false; let final_value = match index.script_language_docids.get(wtxn, &key)? { @@ -536,6 +575,7 @@ fn merge_word_docids_reader_into_fst( /// Write provided entries in database using serialize_value function. /// merge_values function is used if an entry already exist in the database. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] fn write_entries_into_database( data: grenad::Reader, database: &heed::Database, @@ -582,6 +622,7 @@ where /// merge_values function is used if an entry already exist in the database. /// All provided entries must be ordered. /// If the index is not empty, write_entries_into_database is called instead. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] fn append_entries_into_database( data: grenad::Reader, database: &heed::Database, diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index a3ba42119..3cad79467 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -372,6 +372,11 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.embedder_settings = Setting::Reset; } + #[tracing::instrument( + level = "trace" + skip(self, progress_callback, should_abort, old_fields_ids_map), + target = "indexing::documents" + )] fn reindex( &mut self, progress_callback: &FP, diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs index 544bea224..99c6c815e 100644 --- a/milli/src/update/word_prefix_docids.rs +++ b/milli/src/update/word_prefix_docids.rs @@ -39,7 +39,12 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> { } } - #[logging_timer::time("WordPrefixDocids::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "word_prefix_docids" + )] pub fn execute( self, mut new_word_docids_iter: grenad::ReaderCursor, diff --git a/milli/src/update/words_prefix_integer_docids.rs b/milli/src/update/words_prefix_integer_docids.rs index 819cc097b..a05eb8721 100644 --- a/milli/src/update/words_prefix_integer_docids.rs +++ b/milli/src/update/words_prefix_integer_docids.rs @@ -4,7 +4,7 @@ use std::str; use grenad::CompressionType; use heed::types::Bytes; use heed::{BytesDecode, BytesEncode, Database}; -use log::debug; +use tracing::debug; use crate::error::SerializationError; use crate::heed_codec::StrBEU16Codec; @@ -44,7 +44,12 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> { } } - #[logging_timer::time("WordPrefixIntegerDocids::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "words_prefix_integer_docids" + )] pub fn execute( self, new_word_integer_docids: grenad::Reader, diff --git a/milli/src/update/words_prefixes_fst.rs b/milli/src/update/words_prefixes_fst.rs index f26bf93e5..bb1830727 100644 --- a/milli/src/update/words_prefixes_fst.rs +++ b/milli/src/update/words_prefixes_fst.rs @@ -38,7 +38,12 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> { self } - #[logging_timer::time("WordsPrefixesFst::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "words_prefix_fst" + )] pub fn execute(self) -> Result<()> { puffin::profile_function!(); diff --git a/milli/src/vector/hf.rs b/milli/src/vector/hf.rs index cdfdbfb75..04e169c71 100644 --- a/milli/src/vector/hf.rs +++ b/milli/src/vector/hf.rs @@ -73,7 +73,7 @@ impl Embedder { let device = match candle_core::Device::cuda_if_available(0) { Ok(device) => device, Err(error) => { - log::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error); + tracing::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error); candle_core::Device::Cpu } }; diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index 104decb66..cbddddfb7 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -173,12 +173,16 @@ impl Embedder { let retry_duration = match result { Ok(embeddings) => return Ok(embeddings), Err(retry) => { - log::warn!("Failed: {}", retry.error); + tracing::warn!("Failed: {}", retry.error); tokenized |= retry.must_tokenize(); retry.into_duration(attempt) } }?; - log::warn!("Attempt #{}, retrying after {}ms.", attempt, retry_duration.as_millis()); + tracing::warn!( + "Attempt #{}, retrying after {}ms.", + attempt, + retry_duration.as_millis() + ); tokio::time::sleep(retry_duration).await; } @@ -244,7 +248,7 @@ impl Embedder { .map_err(EmbedError::openai_unexpected) .map_err(Retry::retry_later)?; - log::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt."); + tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt."); return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens( error_response.error, @@ -266,7 +270,7 @@ impl Embedder { client: &reqwest::Client, ) -> Result>, Retry> { for text in texts { - log::trace!("Received prompt: {}", text.as_ref()) + tracing::trace!("Received prompt: {}", text.as_ref()) } let request = OpenAiRequest { model: self.options.embedding_model.name(), @@ -289,7 +293,7 @@ impl Embedder { .map_err(EmbedError::openai_unexpected) .map_err(Retry::retry_later)?; - log::trace!("response: {:?}", response.data); + tracing::trace!("response: {:?}", response.data); Ok(response .data diff --git a/tracing-trace/Cargo.toml b/tracing-trace/Cargo.toml new file mode 100644 index 000000000..64848bff9 --- /dev/null +++ b/tracing-trace/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "tracing-trace" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +color-spantrace = "0.2.1" +fxprof-processed-profile = "0.6.0" +serde = { version = "1.0.195", features = ["derive"] } +serde_json = "1.0.111" +tracing = "0.1.40" +tracing-error = "0.2.0" +tracing-subscriber = "0.3.18" +byte-unit = { version = "4.0.19", default-features = false, features = [ + "std", + "serde", +] } +tokio = { version = "1.35.1", features = ["sync"] } + +[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies] +libproc = "0.14.2" diff --git a/tracing-trace/src/bin/trace-to-callstats.rs b/tracing-trace/src/bin/trace-to-callstats.rs new file mode 100644 index 000000000..631cdbb45 --- /dev/null +++ b/tracing-trace/src/bin/trace-to-callstats.rs @@ -0,0 +1,21 @@ +use std::ffi::OsString; +use std::io::Write; + +use serde_json::json; + +fn main() { + let input_file = std::env::args_os().nth(1).expect("missing file"); + let input = + std::io::BufReader::new(std::fs::File::open(&input_file).expect("could not open ")); + let trace = tracing_trace::TraceReader::new(input); + let profile = tracing_trace::processor::span_stats::to_call_stats(trace).unwrap(); + let mut output_file = OsString::new(); + output_file.push("callstats-"); + output_file.push(input_file); + let mut output_file = std::io::BufWriter::new(std::fs::File::create(output_file).unwrap()); + for (key, value) in profile { + serde_json::to_writer(&mut output_file, &json!({key: value})).unwrap(); + writeln!(&mut output_file).unwrap(); + } + output_file.flush().unwrap(); +} diff --git a/tracing-trace/src/bin/trace-to-firefox.rs b/tracing-trace/src/bin/trace-to-firefox.rs new file mode 100644 index 000000000..21adff41d --- /dev/null +++ b/tracing-trace/src/bin/trace-to-firefox.rs @@ -0,0 +1,18 @@ +use std::ffi::OsString; +use std::io::Write; + +fn main() { + let input_file = std::env::args_os().nth(1).expect("missing file"); + let input = + std::io::BufReader::new(std::fs::File::open(&input_file).expect("could not open ")); + let trace = tracing_trace::TraceReader::new(input); + let profile = + tracing_trace::processor::firefox_profiler::to_firefox_profile(trace, "Meilisearch") + .unwrap(); + let mut output_file = OsString::new(); + output_file.push("firefox-"); + output_file.push(input_file); + let mut output_file = std::io::BufWriter::new(std::fs::File::create(output_file).unwrap()); + serde_json::to_writer(&mut output_file, &profile).unwrap(); + output_file.flush().unwrap(); +} diff --git a/tracing-trace/src/entry.rs b/tracing-trace/src/entry.rs new file mode 100644 index 000000000..26e543ba0 --- /dev/null +++ b/tracing-trace/src/entry.rs @@ -0,0 +1,141 @@ +use std::borrow::Cow; + +use serde::{Deserialize, Serialize}; +use tracing::span::Id as TracingId; + +#[derive(Debug, Serialize, Deserialize)] +pub enum Entry { + /// A code location was accessed for the first time + NewCallsite(NewCallsite), + + /// A new thread was accessed + NewThread(NewThread), + + /// A new call started + NewSpan(NewSpan), + + /// An already in-flight call started doing work. + /// + /// For synchronous functions, open should always be followed immediately by enter, exit and close, + /// but for asynchronous functions, work can suspend (exiting the span without closing it), and then + /// later resume (entering the span again without opening it). + /// + /// The timer for a span only starts when the span is entered. + SpanEnter(SpanEnter), + + /// An in-flight call suspended and paused work. + /// + /// For synchronous functions, exit should always be followed immediately by close, + /// but for asynchronous functions, work can suspend and then later resume. + /// + /// The timer for a span pauses when the span is exited. + SpanExit(SpanExit), + + /// A call ended + SpanClose(SpanClose), + + /// An event occurred + Event(Event), +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct SpanId(u64); + +impl From<&TracingId> for SpanId { + fn from(value: &TracingId) -> Self { + Self(value.into_u64()) + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct NewCallsite { + pub call_id: ResourceId, + pub name: Cow<'static, str>, + pub module_path: Option>, + pub file: Option>, + pub line: Option, + pub target: Cow<'static, str>, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct NewThread { + pub thread_id: ResourceId, + pub name: Option, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct SpanEnter { + pub id: SpanId, + pub time: std::time::Duration, + pub memory: Option, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct SpanExit { + pub id: SpanId, + pub time: std::time::Duration, + pub memory: Option, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct Event { + pub call_id: ResourceId, + pub thread_id: ResourceId, + pub parent_id: Option, + pub time: std::time::Duration, + pub memory: Option, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct NewSpan { + pub id: SpanId, + pub call_id: ResourceId, + pub parent_id: Option, + pub thread_id: ResourceId, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct SpanClose { + pub id: SpanId, + pub time: std::time::Duration, +} + +/// A struct with a memory allocation stat. +#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)] +pub struct MemoryStats { + /// Resident set size, measured in bytes. + /// (same as VmRSS in /proc//status). + pub resident: u64, +} + +impl MemoryStats { + #[cfg(any(target_os = "linux", target_os = "macos"))] + pub fn fetch() -> Option { + use libproc::libproc::pid_rusage::{pidrusage, RUsageInfoV0}; + + match pidrusage(std::process::id() as i32) { + Ok(RUsageInfoV0 { ri_resident_size, .. }) => { + Some(MemoryStats { resident: ri_resident_size }) + } + Err(_) => None, /* ignoring error to avoid spamming */ + } + } + + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + pub fn fetch() -> Option { + None + } + + pub fn checked_sub(self, other: Self) -> Option { + Some(Self { resident: self.resident.checked_sub(other.resident)? }) + } +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct ResourceId(pub(crate) usize); + +impl ResourceId { + pub fn to_usize(self) -> usize { + self.0 + } +} diff --git a/tracing-trace/src/error.rs b/tracing-trace/src/error.rs new file mode 100644 index 000000000..831da1e9d --- /dev/null +++ b/tracing-trace/src/error.rs @@ -0,0 +1,21 @@ +#[derive(Debug)] +pub enum Error { + Json(serde_json::Error), +} + +impl std::error::Error for Error {} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("error de/serializing trace entry:")?; + match self { + Error::Json(error) => std::fmt::Display::fmt(&error, f), + } + } +} + +impl From for Error { + fn from(value: serde_json::Error) -> Self { + Self::Json(value) + } +} diff --git a/tracing-trace/src/layer.rs b/tracing-trace/src/layer.rs new file mode 100644 index 000000000..f2f1d64ae --- /dev/null +++ b/tracing-trace/src/layer.rs @@ -0,0 +1,208 @@ +use std::borrow::Cow; +use std::collections::HashMap; +use std::io::Write; +use std::ops::ControlFlow; +use std::sync::RwLock; + +use tracing::span::{Attributes, Id as TracingId}; +use tracing::{Metadata, Subscriber}; +use tracing_subscriber::layer::Context; +use tracing_subscriber::Layer; + +use crate::entry::{ + Entry, Event, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter, + SpanExit, SpanId, +}; +use crate::{Error, Trace, TraceWriter}; + +/// Layer that measures the time spent in spans. +pub struct TraceLayer { + sender: tokio::sync::mpsc::UnboundedSender, + callsites: RwLock>, + start_time: std::time::Instant, + profile_memory: bool, +} + +impl Trace { + pub fn new(profile_memory: bool) -> (Self, TraceLayer) { + let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); + let trace = Trace { receiver }; + let layer = TraceLayer { + sender, + callsites: Default::default(), + start_time: std::time::Instant::now(), + profile_memory, + }; + (trace, layer) + } +} + +impl TraceWriter { + pub fn new(writer: W, profile_memory: bool) -> (Self, TraceLayer) { + let (trace, layer) = Trace::new(profile_memory); + (trace.into_writer(writer), layer) + } + + pub async fn receive(&mut self) -> Result, Error> { + let Some(entry) = self.receiver.recv().await else { + return Ok(ControlFlow::Break(())); + }; + self.write(entry)?; + Ok(ControlFlow::Continue(())) + } + + /// Panics if called from an asynchronous context + pub fn blocking_receive(&mut self) -> Result, Error> { + let Some(entry) = self.receiver.blocking_recv() else { + return Ok(ControlFlow::Break(())); + }; + self.write(entry)?; + Ok(ControlFlow::Continue(())) + } + + pub fn write(&mut self, entry: Entry) -> Result<(), Error> { + Ok(serde_json::ser::to_writer(&mut self.writer, &entry)?) + } + + pub fn try_receive(&mut self) -> Result, Error> { + let Ok(entry) = self.receiver.try_recv() else { + return Ok(ControlFlow::Break(())); + }; + self.write(entry)?; + Ok(ControlFlow::Continue(())) + } + + pub fn flush(&mut self) -> Result<(), std::io::Error> { + self.writer.flush() + } +} + +#[derive(PartialEq, Eq, Hash)] +enum OpaqueIdentifier { + Thread(std::thread::ThreadId), + Call(tracing::callsite::Identifier), +} + +impl TraceLayer { + fn resource_id(&self, opaque: OpaqueIdentifier) -> Option { + self.callsites.read().unwrap().get(&opaque).copied() + } + + fn register_resource_id(&self, opaque: OpaqueIdentifier) -> ResourceId { + let mut map = self.callsites.write().unwrap(); + let len = map.len(); + *map.entry(opaque).or_insert(ResourceId(len)) + } + + fn elapsed(&self) -> std::time::Duration { + self.start_time.elapsed() + } + + fn memory_stats(&self) -> Option { + if self.profile_memory { + MemoryStats::fetch() + } else { + None + } + } + + fn send(&self, entry: Entry) { + // we never care that the other end hanged on us + let _ = self.sender.send(entry); + } + + fn register_callsite(&self, metadata: &'static Metadata<'static>) -> ResourceId { + let call_id = self.register_resource_id(OpaqueIdentifier::Call(metadata.callsite())); + + let module_path = metadata.module_path(); + let file = metadata.file(); + let line = metadata.line(); + let name = metadata.name(); + let target = metadata.target(); + + self.send(Entry::NewCallsite(NewCallsite { + call_id, + module_path: module_path.map(Cow::Borrowed), + file: file.map(Cow::Borrowed), + line, + name: Cow::Borrowed(name), + target: Cow::Borrowed(target), + })); + call_id + } + + fn register_thread(&self) -> ResourceId { + let thread_id = std::thread::current().id(); + let name = std::thread::current().name().map(ToOwned::to_owned); + let thread_id = self.register_resource_id(OpaqueIdentifier::Thread(thread_id)); + self.send(Entry::NewThread(NewThread { thread_id, name })); + thread_id + } +} + +impl Layer for TraceLayer +where + S: Subscriber, +{ + fn on_new_span(&self, attrs: &Attributes<'_>, id: &TracingId, _ctx: Context<'_, S>) { + let call_id = self + .resource_id(OpaqueIdentifier::Call(attrs.metadata().callsite())) + .unwrap_or_else(|| self.register_callsite(attrs.metadata())); + + let thread_id = self + .resource_id(OpaqueIdentifier::Thread(std::thread::current().id())) + .unwrap_or_else(|| self.register_thread()); + + let parent_id = attrs + .parent() + .cloned() + .or_else(|| tracing::Span::current().id()) + .map(|id| SpanId::from(&id)); + + self.send(Entry::NewSpan(NewSpan { id: id.into(), call_id, parent_id, thread_id })); + } + + fn on_enter(&self, id: &TracingId, _ctx: Context<'_, S>) { + self.send(Entry::SpanEnter(SpanEnter { + id: id.into(), + time: self.elapsed(), + memory: self.memory_stats(), + })) + } + + fn on_exit(&self, id: &TracingId, _ctx: Context<'_, S>) { + self.send(Entry::SpanExit(SpanExit { + id: id.into(), + time: self.elapsed(), + memory: self.memory_stats(), + })) + } + + fn on_event(&self, event: &tracing::Event<'_>, _ctx: Context<'_, S>) { + let call_id = self + .resource_id(OpaqueIdentifier::Call(event.metadata().callsite())) + .unwrap_or_else(|| self.register_callsite(event.metadata())); + + let thread_id = self + .resource_id(OpaqueIdentifier::Thread(std::thread::current().id())) + .unwrap_or_else(|| self.register_thread()); + + let parent_id = event + .parent() + .cloned() + .or_else(|| tracing::Span::current().id()) + .map(|id| SpanId::from(&id)); + + self.send(Entry::Event(Event { + call_id, + thread_id, + parent_id, + time: self.elapsed(), + memory: self.memory_stats(), + })) + } + + fn on_close(&self, id: TracingId, _ctx: Context<'_, S>) { + self.send(Entry::SpanClose(SpanClose { id: Into::into(&id), time: self.elapsed() })) + } +} diff --git a/tracing-trace/src/lib.rs b/tracing-trace/src/lib.rs new file mode 100644 index 000000000..77eb61d47 --- /dev/null +++ b/tracing-trace/src/lib.rs @@ -0,0 +1,54 @@ +use std::io::{Read, Write}; + +use entry::Entry; + +pub mod entry; +mod error; +pub mod layer; +pub mod processor; + +pub use error::Error; + +pub struct TraceWriter { + writer: W, + receiver: tokio::sync::mpsc::UnboundedReceiver, +} + +pub struct Trace { + receiver: tokio::sync::mpsc::UnboundedReceiver, +} + +impl Trace { + pub fn into_receiver(self) -> tokio::sync::mpsc::UnboundedReceiver { + self.receiver + } + + pub fn into_writer(self, writer: W) -> TraceWriter { + TraceWriter { writer, receiver: self.receiver } + } +} + +pub struct TraceReader { + reader: R, +} + +impl TraceReader { + pub fn new(reader: R) -> Self { + Self { reader } + } + + fn read(&mut self) -> Option> { + serde_json::Deserializer::from_reader(&mut self.reader) + .into_iter() + .next() + .map(|res| res.map_err(Into::into)) + } +} + +impl Iterator for TraceReader { + type Item = Result; + + fn next(&mut self) -> Option { + self.read() + } +} diff --git a/tracing-trace/src/main.rs b/tracing-trace/src/main.rs new file mode 100644 index 000000000..c2e4f08a7 --- /dev/null +++ b/tracing-trace/src/main.rs @@ -0,0 +1,122 @@ +use tracing::{instrument, Span}; +use tracing_error::{ErrorLayer, InstrumentResult, SpanTrace, TracedError}; + +#[instrument(level = "trace", target = "profile::indexing")] +fn foo() -> Result<(), TracedError> { + let _ = bar(40, 2); + bar(40, 2) +} + +#[derive(Debug)] +pub enum Error { + XTooBig, +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("x too big") + } +} + +impl std::error::Error for Error {} + +#[instrument(level = "trace", target = "profile::indexing")] +fn bar(x: u32, y: u32) -> Result<(), TracedError> { + let handle_ok = spawn_in_current_scope(move || baz(y)); + let handle = spawn_in_current_scope(move || baz(x + y)); + handle_ok.join().unwrap().and(handle.join().unwrap()) +} + +pub fn spawn_in_current_scope(f: F) -> std::thread::JoinHandle +where + F: FnOnce() -> T + Send + 'static, + T: Send + 'static, +{ + let current = Span::current(); + std::thread::spawn(move || { + let span = tracing::trace_span!(parent: ¤t, "thread_spawn", id = ?std::thread::current().id(), name = tracing::field::Empty); + if let Some(name) = std::thread::current().name() { + span.record("name", name); + } + span.in_scope(f) + }) +} + +#[instrument(level = "trace", target = "profile::indexing")] +fn baz(x: u32) -> Result<(), TracedError> { + if x > 10 { + fibo_recursive(10); + return Err(Error::XTooBig).in_current_span(); + } + Ok(()) +} + +#[instrument(level = "trace", target = "profile::indexing")] +fn fibo_recursive(n: u32) -> u32 { + if n == 0 { + return 1; + } + if n == 1 { + return 2; + } + return fibo_recursive(n - 1) - fibo_recursive(n - 2); +} + +use tracing_error::ExtractSpanTrace as _; +use tracing_subscriber::layer::SubscriberExt as _; +use tracing_trace::processor; + +fn on_panic(info: &std::panic::PanicInfo) { + let info = info.to_string(); + let trace = SpanTrace::capture(); + tracing::error!(%info, %trace); +} + +fn main() { + let (mut trace, profiling_layer) = + tracing_trace::TraceWriter::new(std::fs::File::create("trace.json").unwrap(), true); + + let subscriber = tracing_subscriber::registry() + // any number of other subscriber layers may be added before or + // after the `ErrorLayer`... + .with(ErrorLayer::default()) + .with(profiling_layer); + + // set the subscriber as the default for the application + tracing::subscriber::set_global_default(subscriber).unwrap(); + + std::panic::set_hook(Box::new(on_panic)); + + let res = foo(); + + if let Err(error) = res { + print_extracted_spantraces(&error) + } + + while trace.try_receive().unwrap().is_continue() {} + + trace.flush().unwrap(); + + let trace = tracing_trace::TraceReader::new(std::fs::File::open("trace.json").unwrap()); + + let profile = processor::firefox_profiler::to_firefox_profile(trace, "test").unwrap(); + serde_json::to_writer(std::fs::File::create("processed.json").unwrap(), &profile).unwrap(); +} + +fn print_extracted_spantraces(error: &(dyn std::error::Error + 'static)) { + let mut error = Some(error); + let mut ind = 0; + + eprintln!("Error:"); + + while let Some(err) = error { + if let Some(spantrace) = err.span_trace() { + eprintln!("found a spantrace:\n{}", color_spantrace::colorize(spantrace)); + } else { + eprintln!("{:>4}: {}", ind, err); + } + + error = err.source(); + ind += 1; + } +} diff --git a/tracing-trace/src/processor/firefox_profiler.rs b/tracing-trace/src/processor/firefox_profiler.rs new file mode 100644 index 000000000..bae8ea44a --- /dev/null +++ b/tracing-trace/src/processor/firefox_profiler.rs @@ -0,0 +1,450 @@ +use std::collections::HashMap; + +use fxprof_processed_profile::{ + CategoryPairHandle, CounterHandle, CpuDelta, Frame, FrameFlags, FrameInfo, MarkerDynamicField, + MarkerFieldFormat, MarkerLocation, MarkerSchema, MarkerSchemaField, ProcessHandle, Profile, + ProfilerMarker, ReferenceTimestamp, SamplingInterval, StringHandle, Timestamp, +}; +use serde_json::json; + +use crate::entry::{ + Entry, Event, MemoryStats, NewCallsite, NewSpan, ResourceId, SpanClose, SpanEnter, SpanExit, + SpanId, +}; +use crate::{Error, TraceReader}; + +pub fn to_firefox_profile( + trace: TraceReader, + app: &str, +) -> Result { + let mut profile = Profile::new( + app, + ReferenceTimestamp::from_millis_since_unix_epoch(0.0), + SamplingInterval::from_nanos(15), + ); + + let mut last_timestamp = Timestamp::from_nanos_since_reference(0); + let main = profile.add_process(app, 0, last_timestamp); + + let mut calls = HashMap::new(); + let mut threads = HashMap::new(); + let mut spans = HashMap::new(); + + let category = profile.add_category("general", fxprof_processed_profile::CategoryColor::Blue); + let subcategory = profile.add_subcategory(category, "subcategory"); + + let mut last_memory = MemoryStats::default(); + + let mut memory_counters = None; + + for entry in trace { + let entry = entry?; + match entry { + Entry::NewCallsite(callsite) => { + let string_handle = profile.intern_string(callsite.name.as_ref()); + calls.insert(callsite.call_id, (callsite, string_handle)); + } + Entry::NewThread(thread) => { + let thread_handle = profile.add_thread( + main, + thread.thread_id.to_usize() as u32, + last_timestamp, + threads.is_empty(), + ); + if let Some(name) = &thread.name { + profile.set_thread_name(thread_handle, name) + } + threads.insert(thread.thread_id, thread_handle); + } + Entry::NewSpan(span) => { + spans.insert(span.id, (span, SpanStatus::Outside)); + } + Entry::SpanEnter(SpanEnter { id, time, memory }) => { + let (_span, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Outside = status else { + continue; + }; + + *status = SpanStatus::Inside { time, memory }; + + last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64); + + add_memory_samples( + &mut profile, + main, + memory, + last_timestamp, + &mut memory_counters, + &mut last_memory, + ); + } + Entry::SpanExit(SpanExit { id, time, memory }) => { + let (span, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Inside { time: begin, memory: begin_memory } = status else { + continue; + }; + last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64); + + let begin = *begin; + let begin_memory = *begin_memory; + + *status = SpanStatus::Outside; + + let span = *span; + let thread_handle = threads.get(&span.thread_id).unwrap(); + + let frames = make_frames(span, &spans, &calls, subcategory); + + profile.add_sample( + *thread_handle, + to_timestamp(begin), + frames.iter().rev().cloned(), + CpuDelta::ZERO, + 1, + ); + profile.add_sample( + *thread_handle, + to_timestamp(time), + frames.iter().rev().cloned(), + CpuDelta::from_nanos((time - begin).as_nanos() as u64), + 1, + ); + + add_memory_samples( + &mut profile, + main, + memory, + last_timestamp, + &mut memory_counters, + &mut last_memory, + ); + + let (callsite, _) = calls.get(&span.call_id).unwrap(); + + let memory_delta = + begin_memory.zip(memory).and_then(|(begin, end)| end.checked_sub(begin)); + let marker = SpanMarker { callsite, span: &span, memory_delta }; + + profile.add_marker_with_stack( + *thread_handle, + &callsite.name, + marker, + fxprof_processed_profile::MarkerTiming::Interval( + to_timestamp(begin), + to_timestamp(time), + ), + frames.iter().rev().cloned(), + ) + } + Entry::Event(event) => { + let span = event + .parent_id + .as_ref() + .and_then(|parent_id| spans.get(parent_id)) + .and_then(|(span, status)| match status { + SpanStatus::Outside => None, + SpanStatus::Inside { .. } => Some(span), + }) + .copied(); + let timestamp = to_timestamp(event.time); + + let thread_handle = threads.get(&event.thread_id).unwrap(); + + let frames = span + .map(|span| make_frames(span, &spans, &calls, subcategory)) + .unwrap_or_default(); + + profile.add_sample( + *thread_handle, + timestamp, + frames.iter().rev().cloned(), + CpuDelta::ZERO, + 1, + ); + + let memory_delta = add_memory_samples( + &mut profile, + main, + event.memory, + last_timestamp, + &mut memory_counters, + &mut last_memory, + ); + + let (callsite, _) = calls.get(&event.call_id).unwrap(); + + let marker = EventMarker { callsite, event: &event, memory_delta }; + + profile.add_marker_with_stack( + *thread_handle, + &callsite.name, + marker, + fxprof_processed_profile::MarkerTiming::Instant(timestamp), + frames.iter().rev().cloned(), + ); + + last_timestamp = timestamp; + } + Entry::SpanClose(SpanClose { id, time }) => { + spans.remove(&id); + last_timestamp = to_timestamp(time); + } + } + } + + Ok(profile) +} + +struct MemoryCounterHandles { + usage: CounterHandle, +} + +impl MemoryCounterHandles { + fn new(profile: &mut Profile, main: ProcessHandle) -> Self { + let usage = + profile.add_counter(main, "mimmalloc", "Memory", "Amount of memory currently in use"); + Self { usage } + } +} + +fn add_memory_samples( + profile: &mut Profile, + main: ProcessHandle, + memory: Option, + last_timestamp: Timestamp, + memory_counters: &mut Option, + last_memory: &mut MemoryStats, +) -> Option { + let Some(stats) = memory else { + return None; + }; + + let memory_counters = + memory_counters.get_or_insert_with(|| MemoryCounterHandles::new(profile, main)); + + profile.add_counter_sample( + memory_counters.usage, + last_timestamp, + stats.resident as f64 - last_memory.resident as f64, + 0, + ); + + let delta = stats.checked_sub(*last_memory); + *last_memory = stats; + delta +} + +fn to_timestamp(time: std::time::Duration) -> Timestamp { + Timestamp::from_nanos_since_reference(time.as_nanos() as u64) +} + +fn make_frames( + span: NewSpan, + spans: &HashMap, + calls: &HashMap, + subcategory: CategoryPairHandle, +) -> Vec { + let mut frames = Vec::new(); + let mut current_span = span; + loop { + let frame = make_frame(current_span, calls, subcategory); + frames.push(frame); + if let Some(parent) = current_span.parent_id { + current_span = spans.get(&parent).unwrap().0; + } else { + break; + } + } + frames +} + +fn make_frame( + span: NewSpan, + calls: &HashMap, + subcategory: CategoryPairHandle, +) -> FrameInfo { + let (_, call) = calls.get(&span.call_id).unwrap(); + FrameInfo { frame: Frame::Label(*call), category_pair: subcategory, flags: FrameFlags::empty() } +} + +#[derive(Debug, Clone, Copy)] +enum SpanStatus { + Outside, + Inside { time: std::time::Duration, memory: Option }, +} + +struct SpanMarker<'a> { + span: &'a NewSpan, + callsite: &'a NewCallsite, + memory_delta: Option, +} + +impl<'a> ProfilerMarker for SpanMarker<'a> { + const MARKER_TYPE_NAME: &'static str = "span"; + + fn schema() -> MarkerSchema { + let fields = vec![ + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "filename", + label: "File name", + format: MarkerFieldFormat::FilePath, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "line", + label: "Line", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "module_path", + label: "Module path", + format: MarkerFieldFormat::String, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "span_id", + label: "Span ID", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "thread_id", + label: "Thread ID", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "resident", + label: "Resident set size, measured in bytes while this function was executing", + format: MarkerFieldFormat::Bytes, + searchable: false, + }), + ]; + + MarkerSchema { + type_name: Self::MARKER_TYPE_NAME, + locations: vec![ + MarkerLocation::MarkerTable, + MarkerLocation::MarkerChart, + MarkerLocation::TimelineOverview, + ], + chart_label: None, + tooltip_label: Some("{marker.name} - {marker.data.filename}:{marker.data.line}"), + table_label: Some("{marker.data.filename}:{marker.data.line}"), + fields, + } + } + + fn json_marker_data(&self) -> serde_json::Value { + let filename = self.callsite.file.as_deref(); + let line = self.callsite.line; + let module_path = self.callsite.module_path.as_deref(); + let span_id = self.span.id; + let thread_id = self.span.thread_id; + + let mut value = json!({ + "type": Self::MARKER_TYPE_NAME, + "filename": filename, + "line": line, + "module_path": module_path, + "span_id": span_id, + "thread_id": thread_id, + }); + + if let Some(MemoryStats { resident }) = self.memory_delta { + value["resident"] = json!(resident); + } + + value + } +} + +struct EventMarker<'a> { + event: &'a Event, + callsite: &'a NewCallsite, + memory_delta: Option, +} + +impl<'a> ProfilerMarker for EventMarker<'a> { + const MARKER_TYPE_NAME: &'static str = "tracing-event"; + + fn schema() -> MarkerSchema { + let fields = vec![ + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "filename", + label: "File name", + format: MarkerFieldFormat::FilePath, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "line", + label: "Line", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "module_path", + label: "Module path", + format: MarkerFieldFormat::String, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "parent_span_id", + label: "Parent Span ID", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "thread_id", + label: "Thread ID", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "resident", + label: "Resident set size, measured in bytes while this function was executing", + format: MarkerFieldFormat::Bytes, + searchable: false, + }), + ]; + + MarkerSchema { + type_name: Self::MARKER_TYPE_NAME, + locations: vec![ + MarkerLocation::MarkerTable, + MarkerLocation::MarkerChart, + MarkerLocation::TimelineOverview, + ], + chart_label: None, + tooltip_label: Some("{marker.name} - {marker.data.filename}:{marker.data.line}"), + table_label: Some("{marker.data.filename}:{marker.data.line}"), + fields, + } + } + + fn json_marker_data(&self) -> serde_json::Value { + let filename = self.callsite.file.as_deref(); + let line = self.callsite.line; + let module_path = self.callsite.module_path.as_deref(); + let span_id = self.event.parent_id; + let thread_id = self.event.thread_id; + + let mut value = json!({ + "type": Self::MARKER_TYPE_NAME, + "filename": filename, + "line": line, + "module_path": module_path, + "parent_span_id": span_id, + "thread_id": thread_id, + }); + + if let Some(MemoryStats { resident }) = self.memory_delta { + value["resident"] = json!(resident); + } + + value + } +} diff --git a/tracing-trace/src/processor/fmt.rs b/tracing-trace/src/processor/fmt.rs new file mode 100644 index 000000000..68e95c00c --- /dev/null +++ b/tracing-trace/src/processor/fmt.rs @@ -0,0 +1,195 @@ +use std::collections::HashMap; +use std::io::Read; + +use crate::entry::{ + Entry, Event, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter, + SpanExit, SpanId, +}; +use crate::{Error, TraceReader}; + +#[derive(Debug, Clone, Copy)] +enum SpanStatus { + Outside, + Inside(std::time::Duration), +} + +pub fn print_trace(trace: TraceReader) -> Result<(), Error> { + let mut calls = HashMap::new(); + let mut threads = HashMap::new(); + let mut spans = HashMap::new(); + for entry in trace { + let entry = entry?; + match entry { + Entry::NewCallsite(callsite) => { + calls.insert(callsite.call_id, callsite); + } + Entry::NewThread(NewThread { thread_id, name }) => { + threads.insert(thread_id, name); + } + Entry::NewSpan(span) => { + spans.insert(span.id, (span, SpanStatus::Outside)); + } + Entry::SpanEnter(SpanEnter { id, time, memory }) => { + let (span, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Outside = status else { + continue; + }; + + *status = SpanStatus::Inside(time); + + let span = *span; + + match memory { + Some(stats) => println!( + "[{}]{}::{} ({}) <-", + print_thread(&threads, span.thread_id), + print_backtrace(&spans, &calls, &span), + print_span(&calls, &span), + print_memory(stats), + ), + None => println!( + "[{}]{}::{} <-", + print_thread(&threads, span.thread_id), + print_backtrace(&spans, &calls, &span), + print_span(&calls, &span), + ), + } + } + Entry::SpanExit(SpanExit { id, time, memory }) => { + let (span, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Inside(begin) = status else { + continue; + }; + let begin = *begin; + + *status = SpanStatus::Outside; + + let span = *span; + + match memory { + Some(stats) => println!( + "[{}]{}::{} ({}) -> {}", + print_thread(&threads, span.thread_id), + print_backtrace(&spans, &calls, &span), + print_span(&calls, &span), + print_memory(stats), + print_duration(time - begin), + ), + None => println!( + "[{}]{}::{} -> {}", + print_thread(&threads, span.thread_id), + print_backtrace(&spans, &calls, &span), + print_span(&calls, &span), + print_duration(time - begin), + ), + } + } + Entry::SpanClose(SpanClose { id, time: _ }) => { + spans.remove(&id); + } + Entry::Event(Event { call_id, thread_id, parent_id, time: _, memory }) => { + let parent_span = parent_id.and_then(|parent_id| spans.get(&parent_id)).and_then( + |(span, status)| match status { + SpanStatus::Outside => None, + SpanStatus::Inside(_) => Some(span), + }, + ); + match (parent_span, memory) { + (Some(parent_span), Some(stats)) => println!( + "[{}]{}::{} ({}) event: {}", + print_thread(&threads, thread_id), + print_backtrace(&spans, &calls, parent_span), + print_span(&calls, parent_span), + print_memory(stats), + print_call(&calls, call_id) + ), + (Some(parent_span), None) => println!( + "[{}]{}::{} event: {}", + print_thread(&threads, thread_id), + print_backtrace(&spans, &calls, parent_span), + print_span(&calls, parent_span), + print_call(&calls, call_id) + ), + (None, None) => println!( + "[{}] event: {}", + print_thread(&threads, thread_id), + print_call(&calls, call_id) + ), + (None, Some(stats)) => println!( + "[{}] ({}) event: {}", + print_thread(&threads, thread_id), + print_memory(stats), + print_call(&calls, call_id) + ), + } + } + } + } + Ok(()) +} + +fn print_thread(threads: &HashMap>, thread_id: ResourceId) -> String { + let thread = threads.get(&thread_id).unwrap(); + let thread = + thread.as_ref().cloned().unwrap_or_else(|| format!("ThreadId({})", thread_id.to_usize())); + thread +} + +fn print_backtrace( + spans: &HashMap, + calls: &HashMap, + span: &NewSpan, +) -> String { + let mut parents = Vec::new(); + let mut current = span.parent_id; + while let Some(current_id) = ¤t { + let (span, _) = spans.get(current_id).unwrap(); + let callsite = calls.get(&span.call_id).unwrap(); + parents.push(callsite.name.clone()); + + current = span.parent_id; + } + + let x: Vec = parents.into_iter().rev().map(|x| x.to_string()).collect(); + x.join("::") +} + +fn print_span(calls: &HashMap, span: &NewSpan) -> String { + print_call(calls, span.call_id) +} + +fn print_call(calls: &HashMap, call_id: ResourceId) -> String { + let callsite = calls.get(&call_id).unwrap(); + match (callsite.file.clone(), callsite.line) { + (Some(file), None) => format!("{} ({})", callsite.name, file), + (Some(file), Some(line)) => format!("{} ({}:{})", callsite.name, file, line), + _ => callsite.name.to_string(), + } +} + +fn print_duration(duration: std::time::Duration) -> String { + if duration.as_nanos() < 1000 { + format!("{}ns", duration.as_nanos()) + } else if duration.as_micros() < 1000 { + format!("{}μs", duration.as_micros()) + } else if duration.as_millis() < 1000 { + format!("{}ms", duration.as_millis()) + } else if duration.as_secs() < 120 { + format!("{}s", duration.as_secs()) + } else if duration.as_secs_f64() / 60.0 < 60.0 { + format!("{}min", duration.as_secs_f64() / 60.0) + } else if duration.as_secs_f64() / 3600.0 < 8.0 { + format!("{}h", duration.as_secs_f64() / 3600.0) + } else { + format!("{}d", duration.as_secs_f64() / 3600.0 / 24.0) + } +} + +/// Format only the allocated bytes, deallocated bytes and reallocated bytes in GiB, MiB, KiB, Bytes. +fn print_memory(MemoryStats { resident }: MemoryStats) -> String { + use byte_unit::Byte; + let rss_bytes = Byte::from_bytes(resident).get_appropriate_unit(true); + format!("RSS {rss_bytes:.2}") +} diff --git a/tracing-trace/src/processor/mod.rs b/tracing-trace/src/processor/mod.rs new file mode 100644 index 000000000..ea445b0a5 --- /dev/null +++ b/tracing-trace/src/processor/mod.rs @@ -0,0 +1,3 @@ +pub mod firefox_profiler; +pub mod fmt; +pub mod span_stats; diff --git a/tracing-trace/src/processor/span_stats.rs b/tracing-trace/src/processor/span_stats.rs new file mode 100644 index 000000000..f3e6238ff --- /dev/null +++ b/tracing-trace/src/processor/span_stats.rs @@ -0,0 +1,79 @@ +use std::collections::{BTreeMap, HashMap}; +use std::time::Duration; + +use serde::{Deserialize, Serialize}; + +use crate::entry::{Entry, NewCallsite, SpanClose, SpanEnter, SpanExit}; +use crate::{Error, TraceReader}; + +#[derive(Debug, Clone, Copy)] +enum SpanStatus { + Outside, + Inside(std::time::Duration), +} + +#[derive(Serialize, Deserialize)] +pub struct CallStats { + pub call_count: usize, + pub time: u64, +} + +pub fn to_call_stats( + trace: TraceReader, +) -> Result, Error> { + let mut calls = HashMap::new(); + let mut spans = HashMap::new(); + for entry in trace { + let entry = entry?; + match entry { + Entry::NewCallsite(callsite) => { + calls.insert(callsite.call_id, (callsite, vec![])); + } + Entry::NewThread(_) => {} + Entry::NewSpan(span) => { + spans.insert(span.id, (span, SpanStatus::Outside)); + } + Entry::SpanEnter(SpanEnter { id, time, memory: _ }) => { + let (_, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Outside = status else { + continue; + }; + + *status = SpanStatus::Inside(time); + } + Entry::SpanExit(SpanExit { id, time: end, memory: _ }) => { + let (span, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Inside(begin) = status else { + continue; + }; + let begin = *begin; + + *status = SpanStatus::Outside; + + let span = *span; + let (_, call_list) = calls.get_mut(&span.call_id).unwrap(); + call_list.push(end - begin); + } + Entry::SpanClose(SpanClose { id, time: _ }) => { + spans.remove(&id); + } + Entry::Event(_) => {} + } + } + + Ok(calls + .into_iter() + .map(|(_, (call_site, calls))| (site_to_string(call_site), calls_to_stats(calls))) + .collect()) +} + +fn site_to_string(call_site: NewCallsite) -> String { + format!("{}::{}", call_site.target, call_site.name) +} +fn calls_to_stats(calls: Vec) -> CallStats { + let nb = calls.len(); + let sum: Duration = calls.iter().sum(); + CallStats { call_count: nb, time: sum.as_nanos() as u64 } +}