mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 04:44:26 +01:00
Merge #4391
4391: Tracing r=dureuill a=irevoire # Pull Request - [ ] Hide the parameters of the process batch - [x] Make actix-web trace every call on every route - [x] Remove all `env_logger`/`logs` dependencies - [x] Be able to enable or disable the memory measurement using the `/logs` route parameters See the following product discussion: https://github.com/orgs/meilisearch/discussions/721 Supersedes https://github.com/meilisearch/meilisearch/pull/4338 ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4317 ## What does this PR do? Update the format of the logs from: ``` [2024-02-06T14:54:11Z INFO actix_server::builder] starting 10 workers ``` to ``` 2024-02-06T13:58:14.710803Z INFO actix_server::builder: 200: starting 10 workers ``` First, run meilisearch with the route enabled via the feature flag: - `cargo run --experimental-enable-logs-route` - Or at runtime by sending the following payload: ``` curl \ -X PATCH 'http://localhost:7700/experimental-features/' \ -H 'Content-Type: application/json' \ --data-binary '{ "logsRoute": true }' ``` Then gather data from meilisearch by calling for example: ``` curl \ -X POST http://localhost:7700/logs \ -H 'Content-Type: application/json' \ --data-binary '{ "mode": "fmt", "target": "milli=trace" }' ``` Once your operation is over, tell meilisearch to stop the route: ``` curl \ -X DELETE http://localhost:7700/logs ``` ---- In the case you’re profiling code, you will be interested by the next command that converts the output of the route to a format that the firefox profiler can understand. ```bash cargo run --release --bin trace-to-firefox -- 2024-01-17_17:07:55-indexing-trace.json ``` Then go to https://profiler.firefox.com and load it. Note that we can also share the profiles using the https://share.firefox.dev website. Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com> Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
023c2d755f
274
Cargo.lock
generated
274
Cargo.lock
generated
@ -535,6 +535,26 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.68.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"lazy_static",
|
||||
"lazycell",
|
||||
"peeking_take_while",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"rustc-hash",
|
||||
"shlex",
|
||||
"syn 2.0.48",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bit-set"
|
||||
version = "0.5.3"
|
||||
@ -831,6 +851,15 @@ dependencies = [
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cexpr"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
||||
dependencies = [
|
||||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
@ -915,6 +944,17 @@ dependencies = [
|
||||
"inout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clang-sys"
|
||||
version = "1.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1"
|
||||
dependencies = [
|
||||
"glob",
|
||||
"libc",
|
||||
"libloading",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.4.17"
|
||||
@ -961,6 +1001,18 @@ version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15"
|
||||
|
||||
[[package]]
|
||||
name = "color-spantrace"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd6be1b2a7e382e2b98b43b2adcca6bb0e465af0bdd38123873ae61eb17a72c2"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"owo-colors",
|
||||
"tracing-core",
|
||||
"tracing-error",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
version = "1.0.0"
|
||||
@ -1286,6 +1338,15 @@ dependencies = [
|
||||
"syn 2.0.48",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "debugid"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d"
|
||||
dependencies = [
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deduplicating_array"
|
||||
version = "0.1.5"
|
||||
@ -1482,7 +1543,6 @@ dependencies = [
|
||||
"big_s",
|
||||
"flate2",
|
||||
"http 0.2.11",
|
||||
"log",
|
||||
"maplit",
|
||||
"meili-snap",
|
||||
"meilisearch-auth",
|
||||
@ -1496,6 +1556,7 @@ dependencies = [
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"time",
|
||||
"tracing",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
@ -1911,6 +1972,19 @@ dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fxprof-processed-profile"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "27d12c0aed7f1e24276a241aadc4cb8ea9f83000f34bc062b7cc2d51e3b0fabd"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"debugid",
|
||||
"fxhash",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm"
|
||||
version = "0.17.0"
|
||||
@ -2869,11 +2943,9 @@ dependencies = [
|
||||
"file-store",
|
||||
"flate2",
|
||||
"insta",
|
||||
"log",
|
||||
"meili-snap",
|
||||
"meilisearch-auth",
|
||||
"meilisearch-types",
|
||||
"nelson",
|
||||
"page_size 0.5.0",
|
||||
"puffin",
|
||||
"roaring",
|
||||
@ -2883,6 +2955,7 @@ dependencies = [
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"time",
|
||||
"tracing",
|
||||
"ureq",
|
||||
"uuid",
|
||||
]
|
||||
@ -3085,6 +3158,12 @@ version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "lazycell"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||
|
||||
[[package]]
|
||||
name = "levenshtein_automata"
|
||||
version = "0.2.1"
|
||||
@ -3112,6 +3191,16 @@ dependencies = [
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
version = "0.2.7"
|
||||
@ -3128,6 +3217,17 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libproc"
|
||||
version = "0.14.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "229004ebba9d1d5caf41623f1523b6d52abb47d9f6ab87f7e6fc992e3b854aef"
|
||||
dependencies = [
|
||||
"bindgen",
|
||||
"errno",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libz-sys"
|
||||
version = "1.1.12"
|
||||
@ -3586,7 +3686,6 @@ dependencies = [
|
||||
"deserr",
|
||||
"dump",
|
||||
"either",
|
||||
"env_logger",
|
||||
"file-store",
|
||||
"flate2",
|
||||
"fst",
|
||||
@ -3601,7 +3700,6 @@ dependencies = [
|
||||
"itertools 0.11.0",
|
||||
"jsonwebtoken",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"manifest-dir-macros",
|
||||
"maplit",
|
||||
"meili-snap",
|
||||
@ -3644,6 +3742,10 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"toml",
|
||||
"tracing",
|
||||
"tracing-actix-web",
|
||||
"tracing-subscriber",
|
||||
"tracing-trace",
|
||||
"url",
|
||||
"urlencoding",
|
||||
"uuid",
|
||||
@ -3776,7 +3878,6 @@ dependencies = [
|
||||
"json-depth-checker",
|
||||
"levenshtein_automata",
|
||||
"liquid",
|
||||
"log",
|
||||
"logging_timer",
|
||||
"maplit",
|
||||
"md5",
|
||||
@ -3805,6 +3906,7 @@ dependencies = [
|
||||
"time",
|
||||
"tokenizers",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
@ -3882,9 +3984,10 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nelson"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/meilisearch/nelson.git?rev=675f13885548fb415ead8fbb447e9e6d9314000a#675f13885548fb415ead8fbb447e9e6d9314000a"
|
||||
name = "mutually_exclusive_features"
|
||||
version = "0.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e"
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
@ -3916,6 +4019,16 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu-ansi-term"
|
||||
version = "0.46.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
|
||||
dependencies = [
|
||||
"overload",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-bigint"
|
||||
version = "0.4.3"
|
||||
@ -4037,6 +4150,18 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "overload"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
||||
|
||||
[[package]]
|
||||
name = "owo-colors"
|
||||
version = "3.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f"
|
||||
|
||||
[[package]]
|
||||
name = "page_size"
|
||||
version = "0.5.0"
|
||||
@ -4124,6 +4249,12 @@ dependencies = [
|
||||
"sha2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "peeking_take_while"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
|
||||
|
||||
[[package]]
|
||||
name = "pem"
|
||||
version = "1.1.1"
|
||||
@ -4243,6 +4374,26 @@ dependencies = [
|
||||
"siphasher 0.3.11",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project"
|
||||
version = "1.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0302c4a0442c456bd56f841aee5c3bfd17967563f6fadc9ceb9f9c23cf3807e0"
|
||||
dependencies = [
|
||||
"pin-project-internal",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-internal"
|
||||
version = "1.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.48",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-lite"
|
||||
version = "0.2.13"
|
||||
@ -4994,6 +5145,21 @@ dependencies = [
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sharded-slab"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.4.1"
|
||||
@ -5326,6 +5492,16 @@ dependencies = [
|
||||
"syn 2.0.48",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "1.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tiktoken-rs"
|
||||
version = "0.5.8"
|
||||
@ -5554,17 +5730,29 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
|
||||
|
||||
[[package]]
|
||||
name = "tracing"
|
||||
version = "0.1.37"
|
||||
version = "0.1.40"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
|
||||
checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"log",
|
||||
"pin-project-lite",
|
||||
"tracing-attributes",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-actix-web"
|
||||
version = "0.7.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fe0d5feac3f4ca21ba33496bcb1ccab58cca6412b1405ae80f0581541e0ca78"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"mutually_exclusive_features",
|
||||
"pin-project",
|
||||
"tracing",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-attributes"
|
||||
version = "0.1.27"
|
||||
@ -5578,11 +5766,63 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tracing-core"
|
||||
version = "0.1.31"
|
||||
version = "0.1.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a"
|
||||
checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"valuable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-error"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e"
|
||||
dependencies = [
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-log"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
|
||||
dependencies = [
|
||||
"log",
|
||||
"once_cell",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-subscriber"
|
||||
version = "0.3.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
|
||||
dependencies = [
|
||||
"nu-ansi-term",
|
||||
"sharded-slab",
|
||||
"smallvec",
|
||||
"thread_local",
|
||||
"tracing-core",
|
||||
"tracing-log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-trace"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"byte-unit",
|
||||
"color-spantrace",
|
||||
"fxprof-processed-profile",
|
||||
"libproc",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"tracing-error",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -5758,6 +5998,12 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "valuable"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
|
@ -16,12 +16,16 @@ members = [
|
||||
"json-depth-checker",
|
||||
"benchmarks",
|
||||
"fuzzers",
|
||||
"tracing-trace",
|
||||
"xtask",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.6.1"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
readme = "README.md"
|
||||
|
@ -14,7 +14,6 @@ license.workspace = true
|
||||
anyhow = "1.0.79"
|
||||
flate2 = "1.0.28"
|
||||
http = "0.2.11"
|
||||
log = "0.4.20"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.19.0"
|
||||
@ -26,6 +25,7 @@ tar = "0.4.40"
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.40"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
|
@ -120,7 +120,7 @@ impl From<v1::settings::Settings> for v2::Settings<v2::Unchecked> {
|
||||
criterion.as_ref().map(ToString::to_string)
|
||||
}
|
||||
Err(()) => {
|
||||
log::warn!(
|
||||
tracing::warn!(
|
||||
"Could not import the following ranking rule: `{}`.",
|
||||
ranking_rule
|
||||
);
|
||||
@ -152,11 +152,11 @@ impl From<v1::update::UpdateStatus> for Option<v2::updates::UpdateStatus> {
|
||||
use v2::updates::UpdateStatus as UpdateStatusV2;
|
||||
Some(match source {
|
||||
UpdateStatusV1::Enqueued { content } => {
|
||||
log::warn!(
|
||||
tracing::warn!(
|
||||
"Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)",
|
||||
content.update_id
|
||||
);
|
||||
log::warn!("Task will be skipped in the queue of imported tasks.");
|
||||
tracing::warn!("Task will be skipped in the queue of imported tasks.");
|
||||
|
||||
return None;
|
||||
}
|
||||
@ -229,7 +229,7 @@ impl From<v1::update::UpdateType> for Option<v2::updates::UpdateMeta> {
|
||||
Some(match source {
|
||||
v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments,
|
||||
v1::update::UpdateType::Customs => {
|
||||
log::warn!("Ignoring task with type 'Customs' that is no longer supported");
|
||||
tracing::warn!("Ignoring task with type 'Customs' that is no longer supported");
|
||||
return None;
|
||||
}
|
||||
v1::update::UpdateType::DocumentsAddition { .. } => {
|
||||
@ -296,7 +296,7 @@ impl From<v1::settings::RankingRule> for Option<v2::settings::Criterion> {
|
||||
v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity),
|
||||
v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute),
|
||||
v1::settings::RankingRule::WordsPosition => {
|
||||
log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
|
||||
tracing::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
|
||||
None
|
||||
}
|
||||
v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness),
|
||||
|
@ -146,8 +146,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
|
||||
started_processing_at: processing.started_processing_at,
|
||||
}),
|
||||
Err(e) => {
|
||||
log::warn!("Error with task {}: {}", processing.from.update_id, e);
|
||||
log::warn!("Task will be marked as `Failed`.");
|
||||
tracing::warn!("Error with task {}: {}", processing.from.update_id, e);
|
||||
tracing::warn!("Task will be marked as `Failed`.");
|
||||
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
|
||||
from: v3::updates::Processing {
|
||||
from: v3::updates::Enqueued {
|
||||
@ -172,8 +172,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
|
||||
enqueued_at: enqueued.enqueued_at,
|
||||
}),
|
||||
Err(e) => {
|
||||
log::warn!("Error with task {}: {}", enqueued.update_id, e);
|
||||
log::warn!("Task will be marked as `Failed`.");
|
||||
tracing::warn!("Error with task {}: {}", enqueued.update_id, e);
|
||||
tracing::warn!("Task will be marked as `Failed`.");
|
||||
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
|
||||
from: v3::updates::Processing {
|
||||
from: v3::updates::Enqueued {
|
||||
@ -353,7 +353,7 @@ impl From<String> for v3::Code {
|
||||
"malformed_payload" => v3::Code::MalformedPayload,
|
||||
"missing_payload" => v3::Code::MissingPayload,
|
||||
other => {
|
||||
log::warn!("Unknown error code {}", other);
|
||||
tracing::warn!("Unknown error code {}", other);
|
||||
v3::Code::UnretrievableErrorCode
|
||||
}
|
||||
}
|
||||
|
@ -76,20 +76,20 @@ impl CompatV3ToV4 {
|
||||
let index_uid = match index_uid {
|
||||
Some(uid) => uid,
|
||||
None => {
|
||||
log::warn!(
|
||||
tracing::warn!(
|
||||
"Error while importing the update {}.",
|
||||
task.update.id()
|
||||
);
|
||||
log::warn!(
|
||||
tracing::warn!(
|
||||
"The index associated to the uuid `{}` could not be retrieved.",
|
||||
task.uuid.to_string()
|
||||
);
|
||||
if task.update.is_finished() {
|
||||
// we're fucking with his history but not his data, that's ok-ish.
|
||||
log::warn!("The index-uuid will be set as `unknown`.");
|
||||
tracing::warn!("The index-uuid will be set as `unknown`.");
|
||||
String::from("unknown")
|
||||
} else {
|
||||
log::warn!("The task will be ignored.");
|
||||
tracing::warn!("The task will be ignored.");
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
@ -305,7 +305,7 @@ impl From<v4::ResponseError> for v5::ResponseError {
|
||||
"invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt,
|
||||
"invalid_api_key_description" => v5::Code::InvalidApiKeyDescription,
|
||||
other => {
|
||||
log::warn!("Unknown error code {}", other);
|
||||
tracing::warn!("Unknown error code {}", other);
|
||||
v5::Code::UnretrievableErrorCode
|
||||
}
|
||||
};
|
||||
|
@ -304,7 +304,7 @@ impl From<v5::ResponseError> for v6::ResponseError {
|
||||
"immutable_field" => v6::Code::BadRequest,
|
||||
"api_key_already_exists" => v6::Code::ApiKeyAlreadyExists,
|
||||
other => {
|
||||
log::warn!("Unknown error code {}", other);
|
||||
tracing::warn!("Unknown error code {}", other);
|
||||
v6::Code::UnretrievableErrorCode
|
||||
}
|
||||
};
|
||||
@ -329,7 +329,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
new_ranking_rules.push(new_rule);
|
||||
}
|
||||
Err(_) => {
|
||||
log::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
|
||||
tracing::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2,10 +2,10 @@ use std::fs::{self, File};
|
||||
use std::io::{BufRead, BufReader, ErrorKind};
|
||||
use std::path::Path;
|
||||
|
||||
use log::debug;
|
||||
pub use meilisearch_types::milli;
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
use tracing::debug;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::Document;
|
||||
|
@ -19,7 +19,6 @@ dump = { path = "../dump" }
|
||||
enum-iterator = "1.5.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.28"
|
||||
log = "0.4.20"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.5.0"
|
||||
@ -30,7 +29,13 @@ serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
time = { version = "0.3.31", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tracing = "0.1.40"
|
||||
ureq = "2.9.1"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
|
||||
@ -39,4 +44,3 @@ big_s = "1.0.2"
|
||||
crossbeam = "0.8.4"
|
||||
insta = { version = "1.34.0", features = ["json", "redactions"] }
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}
|
||||
|
@ -24,7 +24,6 @@ use std::fs::{self, File};
|
||||
use std::io::BufWriter;
|
||||
|
||||
use dump::IndexMetadata;
|
||||
use log::{debug, error, info, trace};
|
||||
use meilisearch_types::error::Code;
|
||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
@ -514,6 +513,7 @@ impl IndexScheduler {
|
||||
/// 3. We get the *next* snapshot to process.
|
||||
/// 4. We get the *next* dump to process.
|
||||
/// 5. We get the *next* tasks to process for a specific index.
|
||||
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
|
||||
pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result<Option<Batch>> {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
|
||||
@ -619,6 +619,7 @@ impl IndexScheduler {
|
||||
/// The list of tasks that were processed. The metadata of each task in the returned
|
||||
/// list is updated accordingly, with the exception of the its date fields
|
||||
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
|
||||
#[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))]
|
||||
pub(crate) fn process_batch(&self, batch: Batch) -> Result<Vec<Task>> {
|
||||
#[cfg(test)]
|
||||
{
|
||||
@ -668,9 +669,10 @@ impl IndexScheduler {
|
||||
Ok(()) => {
|
||||
for content_uuid in canceled_tasks_content_uuids {
|
||||
if let Err(error) = self.delete_update_file(content_uuid) {
|
||||
error!(
|
||||
"We failed deleting the content file indentified as {}: {}",
|
||||
content_uuid, error
|
||||
tracing::error!(
|
||||
file_content_uuid = %content_uuid,
|
||||
%error,
|
||||
"Failed deleting content file"
|
||||
)
|
||||
}
|
||||
}
|
||||
@ -969,7 +971,10 @@ impl IndexScheduler {
|
||||
|
||||
match res {
|
||||
Ok(_) => (),
|
||||
Err(e) => error!("Could not write the stats of the index {}", e),
|
||||
Err(e) => tracing::error!(
|
||||
error = &e as &dyn std::error::Error,
|
||||
"Could not write the stats of the index"
|
||||
),
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
@ -997,7 +1002,7 @@ impl IndexScheduler {
|
||||
builder.set_primary_key(primary_key);
|
||||
let must_stop_processing = self.must_stop_processing.clone();
|
||||
builder.execute(
|
||||
|indexing_step| debug!("update: {:?}", indexing_step),
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)?;
|
||||
index_wtxn.commit()?;
|
||||
@ -1024,7 +1029,10 @@ impl IndexScheduler {
|
||||
|
||||
match res {
|
||||
Ok(_) => (),
|
||||
Err(e) => error!("Could not write the stats of the index {}", e),
|
||||
Err(e) => tracing::error!(
|
||||
error = &e as &dyn std::error::Error,
|
||||
"Could not write the stats of the index"
|
||||
),
|
||||
}
|
||||
|
||||
Ok(vec![task])
|
||||
@ -1143,6 +1151,11 @@ impl IndexScheduler {
|
||||
///
|
||||
/// ## Return
|
||||
/// The list of processed tasks.
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip(self, index_wtxn, index),
|
||||
target = "indexing::scheduler"
|
||||
)]
|
||||
fn apply_index_operation<'i>(
|
||||
&self,
|
||||
index_wtxn: &mut RwTxn<'i>,
|
||||
@ -1203,7 +1216,7 @@ impl IndexScheduler {
|
||||
milli::update::Settings::new(index_wtxn, index, indexer_config);
|
||||
builder.set_primary_key(primary_key);
|
||||
builder.execute(
|
||||
|indexing_step| debug!("update: {:?}", indexing_step),
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.clone().get(),
|
||||
)?;
|
||||
primary_key_has_been_set = true;
|
||||
@ -1222,7 +1235,7 @@ impl IndexScheduler {
|
||||
index,
|
||||
indexer_config,
|
||||
config,
|
||||
|indexing_step| trace!("update: {:?}", indexing_step),
|
||||
|indexing_step| tracing::trace!(?indexing_step, "Update"),
|
||||
|| must_stop_processing.get(),
|
||||
)?;
|
||||
|
||||
@ -1294,7 +1307,7 @@ impl IndexScheduler {
|
||||
|
||||
if !tasks.iter().all(|res| res.error.is_some()) {
|
||||
let addition = builder.execute()?;
|
||||
info!("document addition done: {:?}", addition);
|
||||
tracing::info!(indexing_result = ?addition, "document indexing done");
|
||||
} else if primary_key_has_been_set {
|
||||
// Everything failed but we've set a primary key.
|
||||
// We need to remove it.
|
||||
@ -1302,7 +1315,7 @@ impl IndexScheduler {
|
||||
milli::update::Settings::new(index_wtxn, index, indexer_config);
|
||||
builder.reset_primary_key();
|
||||
builder.execute(
|
||||
|indexing_step| trace!("update: {:?}", indexing_step),
|
||||
|indexing_step| tracing::trace!(update = ?indexing_step),
|
||||
|| must_stop_processing.clone().get(),
|
||||
)?;
|
||||
}
|
||||
@ -1372,7 +1385,7 @@ impl IndexScheduler {
|
||||
|
||||
let must_stop_processing = self.must_stop_processing.clone();
|
||||
builder.execute(
|
||||
|indexing_step| debug!("update: {:?}", indexing_step),
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)?;
|
||||
|
||||
@ -1584,7 +1597,7 @@ fn delete_document_by_filter<'a>(
|
||||
index,
|
||||
indexer_config,
|
||||
config,
|
||||
|indexing_step| debug!("update: {:?}", indexing_step),
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)?;
|
||||
|
||||
|
@ -43,6 +43,19 @@ impl RoFeatures {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_logs_route(&self) -> Result<()> {
|
||||
if self.runtime.logs_route {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "getting logs through the `/logs/stream` route",
|
||||
feature: "logs route",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/721",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.vector_store {
|
||||
Ok(())
|
||||
@ -81,6 +94,7 @@ impl FeatureData {
|
||||
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
|
||||
metrics: instance_features.metrics || persisted_features.metrics,
|
||||
logs_route: instance_features.logs_route || persisted_features.logs_route,
|
||||
..persisted_features
|
||||
}));
|
||||
|
||||
|
@ -3,13 +3,13 @@ use std::sync::{Arc, RwLock};
|
||||
use std::time::Duration;
|
||||
use std::{fs, thread};
|
||||
|
||||
use log::error;
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::{FieldDistribution, Index};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use tracing::error;
|
||||
use uuid::Uuid;
|
||||
|
||||
use self::index_map::IndexMap;
|
||||
|
@ -535,17 +535,17 @@ impl IndexScheduler {
|
||||
let budget = if Self::is_good_heed(tasks_path, DEFAULT_BUDGET) {
|
||||
DEFAULT_BUDGET
|
||||
} else {
|
||||
log::debug!("determining budget with dichotomic search");
|
||||
tracing::debug!("determining budget with dichotomic search");
|
||||
utils::dichotomic_search(DEFAULT_BUDGET / 2, |map_size| {
|
||||
Self::is_good_heed(tasks_path, map_size)
|
||||
})
|
||||
};
|
||||
|
||||
log::debug!("memmap budget: {budget}B");
|
||||
tracing::debug!("memmap budget: {budget}B");
|
||||
let mut budget = budget / 2;
|
||||
if task_db_size > (budget / 2) {
|
||||
task_db_size = clamp_to_page_size(budget * 2 / 5);
|
||||
log::debug!(
|
||||
tracing::debug!(
|
||||
"Decreasing max size of task DB to {task_db_size}B due to constrained memory space"
|
||||
);
|
||||
}
|
||||
@ -555,13 +555,13 @@ impl IndexScheduler {
|
||||
let budget = budget;
|
||||
let task_db_size = task_db_size;
|
||||
|
||||
log::debug!("index budget: {budget}B");
|
||||
tracing::debug!("index budget: {budget}B");
|
||||
let mut index_count = budget / base_map_size;
|
||||
if index_count < 2 {
|
||||
// take a bit less than half than the budget to make sure we can always afford to open an index
|
||||
let map_size = (budget * 2) / 5;
|
||||
// single index of max budget
|
||||
log::debug!("1 index of {map_size}B can be opened simultaneously.");
|
||||
tracing::debug!("1 index of {map_size}B can be opened simultaneously.");
|
||||
return IndexBudget { map_size, index_count: 1, task_db_size };
|
||||
}
|
||||
// give us some space for an additional index when the cache is already full
|
||||
@ -570,7 +570,7 @@ impl IndexScheduler {
|
||||
if index_count > max_index_count {
|
||||
index_count = max_index_count;
|
||||
}
|
||||
log::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously.");
|
||||
tracing::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously.");
|
||||
IndexBudget { map_size: base_map_size, index_count, task_db_size }
|
||||
}
|
||||
|
||||
@ -617,7 +617,7 @@ impl IndexScheduler {
|
||||
Ok(TickOutcome::TickAgain(_)) => (),
|
||||
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
|
||||
Err(e) => {
|
||||
log::error!("{e}");
|
||||
tracing::error!("{e}");
|
||||
// Wait one second when an irrecoverable error occurs.
|
||||
if !e.is_recoverable() {
|
||||
std::thread::sleep(Duration::from_secs(1));
|
||||
@ -634,15 +634,15 @@ impl IndexScheduler {
|
||||
let mut file = match File::create(format!("{}.puffin", now)) {
|
||||
Ok(file) => file,
|
||||
Err(e) => {
|
||||
log::error!("{e}");
|
||||
tracing::error!("{e}");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Err(e) = frame_view.save_to_writer(&mut file) {
|
||||
log::error!("{e}");
|
||||
tracing::error!("{e}");
|
||||
}
|
||||
if let Err(e) = file.sync_all() {
|
||||
log::error!("{e}");
|
||||
tracing::error!("{e}");
|
||||
}
|
||||
// We erase this frame view as it is no more useful. We want to
|
||||
// measure the new frames now that we exported the previous ones.
|
||||
@ -1190,10 +1190,10 @@ impl IndexScheduler {
|
||||
self.update_task(&mut wtxn, &task)
|
||||
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
|
||||
if let Err(e) = self.delete_persisted_task_data(&task) {
|
||||
log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
|
||||
tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
|
||||
}
|
||||
}
|
||||
log::info!("A batch of tasks was successfully completed.");
|
||||
tracing::info!("A batch of tasks was successfully completed.");
|
||||
}
|
||||
// If we have an abortion error we must stop the tick here and re-schedule tasks.
|
||||
Err(Error::Milli(milli::Error::InternalError(
|
||||
@ -1247,7 +1247,7 @@ impl IndexScheduler {
|
||||
self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?;
|
||||
|
||||
if let Err(e) = self.delete_persisted_task_data(&task) {
|
||||
log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
|
||||
tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
|
||||
}
|
||||
self.update_task(&mut wtxn, &task)
|
||||
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
|
||||
@ -1341,7 +1341,7 @@ impl IndexScheduler {
|
||||
};
|
||||
|
||||
if let Err(e) = request.send(reader) {
|
||||
log::error!("While sending data to the webhook: {e}");
|
||||
tracing::error!("While sending data to the webhook: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
@ -1367,12 +1367,12 @@ impl IndexScheduler {
|
||||
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
|
||||
// the deletion tasks we enqueued ourselves.
|
||||
if to_delete.len() < 2 {
|
||||
log::warn!("The task queue is almost full, but no task can be deleted yet.");
|
||||
tracing::warn!("The task queue is almost full, but no task can be deleted yet.");
|
||||
// the only thing we can do is hope that the user tasks are going to finish
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
log::info!(
|
||||
tracing::info!(
|
||||
"The task queue is almost full. Deleting the oldest {} finished tasks.",
|
||||
to_delete.len()
|
||||
);
|
||||
|
@ -310,6 +310,8 @@ TooManyVectors , InvalidRequest , BAD_REQUEST ;
|
||||
UnretrievableDocument , Internal , BAD_REQUEST ;
|
||||
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
|
||||
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
||||
|
||||
// Experimental features
|
||||
VectorEmbeddingError , InvalidRequest , BAD_REQUEST
|
||||
}
|
||||
|
||||
|
@ -5,10 +5,12 @@ use serde::{Deserialize, Serialize};
|
||||
pub struct RuntimeTogglableFeatures {
|
||||
pub vector_store: bool,
|
||||
pub metrics: bool,
|
||||
pub logs_route: bool,
|
||||
pub export_puffin_reports: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
pub struct InstanceTogglableFeatures {
|
||||
pub metrics: bool,
|
||||
pub logs_route: bool,
|
||||
}
|
||||
|
@ -42,7 +42,6 @@ crossbeam-channel = "0.5.11"
|
||||
deserr = { version = "0.6.1", features = ["actix-web"] }
|
||||
dump = { path = "../dump" }
|
||||
either = "1.9.0"
|
||||
env_logger = "0.10.1"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.28"
|
||||
fst = "0.4.7"
|
||||
@ -55,7 +54,6 @@ is-terminal = "0.4.10"
|
||||
itertools = "0.11.0"
|
||||
jsonwebtoken = "8.3.0"
|
||||
lazy_static = "1.4.0"
|
||||
log = "0.4.20"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
mimalloc = { version = "0.1.39", default-features = false }
|
||||
@ -105,6 +103,10 @@ yaup = "0.2.1"
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.4.1"
|
||||
url = { version = "2.5.0", features = ["serde"] }
|
||||
tracing = "0.1.40"
|
||||
tracing-subscriber = "0.3.18"
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
tracing-actix-web = "0.7.9"
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.9.0"
|
||||
|
@ -250,6 +250,7 @@ impl super::Analytics for SegmentAnalytics {
|
||||
struct Infos {
|
||||
env: String,
|
||||
experimental_enable_metrics: bool,
|
||||
experimental_enable_logs_route: bool,
|
||||
experimental_reduce_indexing_memory_usage: bool,
|
||||
experimental_max_number_of_batched_tasks: usize,
|
||||
db_path: bool,
|
||||
@ -287,6 +288,7 @@ impl From<Opt> for Infos {
|
||||
let Opt {
|
||||
db_path,
|
||||
experimental_enable_metrics,
|
||||
experimental_enable_logs_route,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_max_number_of_batched_tasks,
|
||||
http_addr,
|
||||
@ -333,6 +335,7 @@ impl From<Opt> for Infos {
|
||||
Self {
|
||||
env,
|
||||
experimental_enable_metrics,
|
||||
experimental_enable_logs_route,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
db_path: db_path != PathBuf::from("./data.ms"),
|
||||
import_dump: import_dump.is_some(),
|
||||
|
@ -12,6 +12,8 @@ pub enum MeilisearchHttpError {
|
||||
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
|
||||
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
|
||||
MissingContentType(Vec<String>),
|
||||
#[error("The `/logs/stream` route is currently in use by someone else.")]
|
||||
AlreadyUsedLogRoute,
|
||||
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
|
||||
CsvDelimiterWithWrongContentType(String),
|
||||
#[error(
|
||||
@ -59,6 +61,7 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
|
||||
MeilisearchHttpError::AlreadyUsedLogRoute => Code::BadRequest,
|
||||
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
|
||||
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
|
||||
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
|
||||
|
@ -29,7 +29,6 @@ use error::PayloadError;
|
||||
use extractors::payload::PayloadConfig;
|
||||
use http::header::CONTENT_TYPE;
|
||||
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
|
||||
use log::error;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
|
||||
@ -39,6 +38,8 @@ use meilisearch_types::versioning::{check_version_file, create_version_file};
|
||||
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
|
||||
pub use option::Opt;
|
||||
use option::ScheduleSnapshot;
|
||||
use tracing::{error, info_span};
|
||||
use tracing_subscriber::filter::Targets;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
|
||||
@ -86,10 +87,21 @@ fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
/// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`.
|
||||
pub type LogRouteHandle =
|
||||
tracing_subscriber::reload::Handle<LogRouteType, tracing_subscriber::Registry>;
|
||||
|
||||
pub type LogRouteType = tracing_subscriber::filter::Filtered<
|
||||
Option<Box<dyn tracing_subscriber::Layer<tracing_subscriber::Registry> + Send + Sync>>,
|
||||
Targets,
|
||||
tracing_subscriber::Registry,
|
||||
>;
|
||||
|
||||
pub fn create_app(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: Data<AuthController>,
|
||||
opt: Opt,
|
||||
logs: LogRouteHandle,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
enable_dashboard: bool,
|
||||
) -> actix_web::App<
|
||||
@ -108,6 +120,7 @@ pub fn create_app(
|
||||
index_scheduler.clone(),
|
||||
auth_controller.clone(),
|
||||
&opt,
|
||||
logs,
|
||||
analytics.clone(),
|
||||
)
|
||||
})
|
||||
@ -123,11 +136,49 @@ pub fn create_app(
|
||||
.allow_any_method()
|
||||
.max_age(86_400), // 24h
|
||||
)
|
||||
.wrap(actix_web::middleware::Logger::default())
|
||||
.wrap(tracing_actix_web::TracingLogger::<AwebTracingLogger>::new())
|
||||
.wrap(actix_web::middleware::Compress::default())
|
||||
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
|
||||
}
|
||||
|
||||
struct AwebTracingLogger;
|
||||
|
||||
impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
|
||||
fn on_request_start(request: &actix_web::dev::ServiceRequest) -> tracing::Span {
|
||||
use tracing::field::Empty;
|
||||
|
||||
let conn_info = request.connection_info();
|
||||
let headers = request.headers();
|
||||
let user_agent = headers
|
||||
.get(http::header::USER_AGENT)
|
||||
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
|
||||
.unwrap_or_default();
|
||||
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
|
||||
}
|
||||
|
||||
fn on_request_end<B: MessageBody>(
|
||||
span: tracing::Span,
|
||||
outcome: &Result<ServiceResponse<B>, actix_web::Error>,
|
||||
) {
|
||||
match &outcome {
|
||||
Ok(response) => {
|
||||
let code: i32 = response.response().status().as_u16().into();
|
||||
span.record("status_code", code);
|
||||
|
||||
if let Some(error) = response.response().error() {
|
||||
// use the status code already constructed for the outgoing HTTP response
|
||||
span.record("error", &tracing::field::display(error.as_response_error()));
|
||||
}
|
||||
}
|
||||
Err(error) => {
|
||||
let code: i32 = error.error_response().status().as_u16().into();
|
||||
span.record("status_code", code);
|
||||
span.record("error", &tracing::field::display(error.as_response_error()));
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
enum OnFailure {
|
||||
RemoveDb,
|
||||
KeepDb,
|
||||
@ -280,15 +331,15 @@ fn import_dump(
|
||||
let mut dump_reader = dump::DumpReader::open(reader)?;
|
||||
|
||||
if let Some(date) = dump_reader.date() {
|
||||
log::info!(
|
||||
"Importing a dump of meilisearch `{:?}` from the {}",
|
||||
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
||||
date
|
||||
tracing::info!(
|
||||
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
||||
%date,
|
||||
"Importing a dump of meilisearch"
|
||||
);
|
||||
} else {
|
||||
log::info!(
|
||||
"Importing a dump of meilisearch `{:?}`",
|
||||
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
||||
tracing::info!(
|
||||
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
||||
"Importing a dump of meilisearch",
|
||||
);
|
||||
}
|
||||
|
||||
@ -322,7 +373,7 @@ fn import_dump(
|
||||
for index_reader in dump_reader.indexes()? {
|
||||
let mut index_reader = index_reader?;
|
||||
let metadata = index_reader.metadata();
|
||||
log::info!("Importing index `{}`.", metadata.uid);
|
||||
tracing::info!("Importing index `{}`.", metadata.uid);
|
||||
|
||||
let date = Some((metadata.created_at, metadata.updated_at));
|
||||
let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
|
||||
@ -336,14 +387,15 @@ fn import_dump(
|
||||
}
|
||||
|
||||
// 4.2 Import the settings.
|
||||
log::info!("Importing the settings.");
|
||||
tracing::info!("Importing the settings.");
|
||||
let settings = index_reader.settings()?;
|
||||
apply_settings_to_builder(&settings, &mut builder);
|
||||
builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?;
|
||||
builder
|
||||
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
|
||||
|
||||
// 4.3 Import the documents.
|
||||
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
||||
log::info!("Importing the documents.");
|
||||
tracing::info!("Importing the documents.");
|
||||
let file = tempfile::tempfile()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
|
||||
for document in index_reader.documents()? {
|
||||
@ -365,15 +417,16 @@ fn import_dump(
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..Default::default()
|
||||
},
|
||||
|indexing_step| log::trace!("update: {:?}", indexing_step),
|
||||
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|
||||
|| false,
|
||||
)?;
|
||||
|
||||
let (builder, user_result) = builder.add_documents(reader)?;
|
||||
log::info!("{} documents found.", user_result?);
|
||||
let user_result = user_result?;
|
||||
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
|
||||
builder.execute()?;
|
||||
wtxn.commit()?;
|
||||
log::info!("All documents successfully imported.");
|
||||
tracing::info!("All documents successfully imported.");
|
||||
}
|
||||
|
||||
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
|
||||
@ -391,6 +444,7 @@ pub fn configure_data(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth: Data<AuthController>,
|
||||
opt: &Opt,
|
||||
logs: LogRouteHandle,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) {
|
||||
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
|
||||
@ -398,6 +452,7 @@ pub fn configure_data(
|
||||
.app_data(index_scheduler)
|
||||
.app_data(auth)
|
||||
.app_data(web::Data::from(analytics))
|
||||
.app_data(web::Data::new(logs))
|
||||
.app_data(
|
||||
web::JsonConfig::default()
|
||||
.limit(http_payload_size_limit)
|
||||
|
@ -1,6 +1,7 @@
|
||||
use std::env;
|
||||
use std::io::{stderr, Write};
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use actix_web::http::KeepAlive;
|
||||
@ -9,37 +10,60 @@ use actix_web::HttpServer;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use is_terminal::IsTerminal;
|
||||
use meilisearch::analytics::Analytics;
|
||||
use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt};
|
||||
use meilisearch::{
|
||||
analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, LogRouteType, Opt,
|
||||
};
|
||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||
use mimalloc::MiMalloc;
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::layer::SubscriberExt as _;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
static ALLOC: MiMalloc = MiMalloc;
|
||||
|
||||
fn default_layer() -> LogRouteType {
|
||||
None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
|
||||
}
|
||||
|
||||
/// does all the setup before meilisearch is launched
|
||||
fn setup(opt: &Opt) -> anyhow::Result<()> {
|
||||
let mut log_builder = env_logger::Builder::new();
|
||||
let log_filters = format!(
|
||||
"{},h2=warn,hyper=warn,tokio_util=warn,tracing=warn,rustls=warn,mio=warn,reqwest=warn",
|
||||
opt.log_level
|
||||
fn setup(opt: &Opt) -> anyhow::Result<LogRouteHandle> {
|
||||
let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(default_layer());
|
||||
let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer;
|
||||
|
||||
let subscriber = tracing_subscriber::registry().with(route_layer).with(
|
||||
tracing_subscriber::fmt::layer()
|
||||
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE)
|
||||
.with_filter(
|
||||
tracing_subscriber::filter::LevelFilter::from_str(&opt.log_level.to_string())
|
||||
.unwrap(),
|
||||
),
|
||||
);
|
||||
log_builder.parse_filters(&log_filters);
|
||||
|
||||
log_builder.init();
|
||||
// set the subscriber as the default for the application
|
||||
tracing::subscriber::set_global_default(subscriber).unwrap();
|
||||
|
||||
Ok(())
|
||||
Ok(route_layer_handle)
|
||||
}
|
||||
|
||||
fn on_panic(info: &std::panic::PanicInfo) {
|
||||
let info = info.to_string().replace('\n', " ");
|
||||
tracing::error!(%info);
|
||||
}
|
||||
|
||||
#[actix_web::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
std::panic::set_hook(Box::new(on_panic));
|
||||
|
||||
anyhow::ensure!(
|
||||
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
|
||||
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
|
||||
);
|
||||
|
||||
setup(&opt)?;
|
||||
let log_handle = setup(&opt)?;
|
||||
|
||||
match (opt.env.as_ref(), &opt.master_key) {
|
||||
("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => {
|
||||
@ -77,7 +101,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
print_launch_resume(&opt, analytics.clone(), config_read_from);
|
||||
|
||||
run_http(index_scheduler, auth_controller, opt, analytics).await?;
|
||||
run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -86,6 +110,7 @@ async fn run_http(
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: Arc<AuthController>,
|
||||
opt: Opt,
|
||||
logs: LogRouteHandle,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) -> anyhow::Result<()> {
|
||||
let enable_dashboard = &opt.env == "development";
|
||||
@ -98,6 +123,7 @@ async fn run_http(
|
||||
index_scheduler.clone(),
|
||||
auth_controller.clone(),
|
||||
opt.clone(),
|
||||
logs.clone(),
|
||||
analytics.clone(),
|
||||
enable_dashboard,
|
||||
)
|
||||
|
@ -51,6 +51,7 @@ const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP";
|
||||
const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
|
||||
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
|
||||
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
||||
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
|
||||
@ -309,6 +310,13 @@ pub struct Opt {
|
||||
#[serde(default)]
|
||||
pub experimental_enable_metrics: bool,
|
||||
|
||||
/// Experimental logs route feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/721>
|
||||
///
|
||||
/// Enables the log route on the `POST /logs/stream` endpoint and the `DELETE /logs/stream` to stop receiving logs.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE)]
|
||||
#[serde(default)]
|
||||
pub experimental_enable_logs_route: bool,
|
||||
|
||||
/// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)]
|
||||
#[serde(default)]
|
||||
@ -414,6 +422,7 @@ impl Opt {
|
||||
#[cfg(feature = "analytics")]
|
||||
no_analytics,
|
||||
experimental_enable_metrics,
|
||||
experimental_enable_logs_route,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
} = self;
|
||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||
@ -470,6 +479,10 @@ impl Opt {
|
||||
MEILI_EXPERIMENTAL_ENABLE_METRICS,
|
||||
experimental_enable_metrics.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE,
|
||||
experimental_enable_logs_route.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
|
||||
experimental_reduce_indexing_memory_usage.to_string(),
|
||||
@ -524,7 +537,10 @@ impl Opt {
|
||||
}
|
||||
|
||||
pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures {
|
||||
InstanceTogglableFeatures { metrics: self.experimental_enable_metrics }
|
||||
InstanceTogglableFeatures {
|
||||
metrics: self.experimental_enable_metrics,
|
||||
logs_route: self.experimental_enable_logs_route,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,11 +1,11 @@
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde_json::json;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
@ -32,6 +32,6 @@ pub async fn create_dump(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Create dump");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
@ -3,11 +3,11 @@ use actix_web::{HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use deserr::Deserr;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
use serde_json::json;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
@ -33,8 +33,9 @@ async fn get_features(
|
||||
let features = index_scheduler.features();
|
||||
|
||||
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
|
||||
debug!("returns: {:?}", features.runtime_features());
|
||||
HttpResponse::Ok().json(features.runtime_features())
|
||||
let features = features.runtime_features();
|
||||
debug!(returns = ?features, "Get features");
|
||||
HttpResponse::Ok().json(features)
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
@ -45,6 +46,8 @@ pub struct RuntimeTogglableFeatures {
|
||||
#[deserr(default)]
|
||||
pub metrics: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub logs_route: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub export_puffin_reports: Option<bool>,
|
||||
}
|
||||
|
||||
@ -58,11 +61,13 @@ async fn patch_features(
|
||||
analytics: Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let features = index_scheduler.features();
|
||||
debug!(parameters = ?new_features, "Patch features");
|
||||
|
||||
let old_features = features.runtime_features();
|
||||
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
|
||||
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
||||
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
|
||||
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
|
||||
export_puffin_reports: new_features
|
||||
.0
|
||||
.export_puffin_reports
|
||||
@ -75,6 +80,7 @@ async fn patch_features(
|
||||
let meilisearch_types::features::RuntimeTogglableFeatures {
|
||||
vector_store,
|
||||
metrics,
|
||||
logs_route,
|
||||
export_puffin_reports,
|
||||
} = new_features;
|
||||
|
||||
@ -83,10 +89,12 @@ async fn patch_features(
|
||||
json!({
|
||||
"vector_store": vector_store,
|
||||
"metrics": metrics,
|
||||
"logs_route": logs_route,
|
||||
"export_puffin_reports": export_puffin_reports,
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
index_scheduler.put_runtime_features(new_features)?;
|
||||
debug!(returns = ?new_features, "Patch features");
|
||||
Ok(HttpResponse::Ok().json(new_features))
|
||||
}
|
||||
|
@ -8,7 +8,6 @@ use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::Deserr;
|
||||
use futures::StreamExt;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
|
||||
@ -28,6 +27,7 @@ use serde_json::Value;
|
||||
use tempfile::tempfile;
|
||||
use tokio::fs::File;
|
||||
use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind};
|
||||
use crate::error::MeilisearchHttpError;
|
||||
@ -101,6 +101,7 @@ pub async fn get_document(
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let DocumentParam { index_uid, document_id } = document_param.into_inner();
|
||||
debug!(parameters = ?params, "Get document");
|
||||
let index_uid = IndexUid::try_from(index_uid)?;
|
||||
|
||||
analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req);
|
||||
@ -110,7 +111,7 @@ pub async fn get_document(
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?;
|
||||
debug!("returns: {:?}", document);
|
||||
debug!(returns = ?document, "Get document");
|
||||
Ok(HttpResponse::Ok().json(document))
|
||||
}
|
||||
|
||||
@ -131,7 +132,7 @@ pub async fn delete_document(
|
||||
};
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Delete document");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@ -168,9 +169,8 @@ pub async fn documents_by_query_post(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with body: {:?}", body);
|
||||
|
||||
let body = body.into_inner();
|
||||
debug!(parameters = ?body, "Get documents POST");
|
||||
|
||||
analytics.post_fetch_documents(
|
||||
&DocumentFetchKind::Normal {
|
||||
@ -191,7 +191,7 @@ pub async fn get_documents(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", params);
|
||||
debug!(parameters = ?params, "Get documents GET");
|
||||
|
||||
let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
|
||||
|
||||
@ -235,7 +235,7 @@ fn documents_by_query(
|
||||
|
||||
let ret = PaginationView::new(offset, limit, total as usize, documents);
|
||||
|
||||
debug!("returns: {:?}", ret);
|
||||
debug!(returns = ?ret, "Get documents");
|
||||
Ok(HttpResponse::Ok().json(ret))
|
||||
}
|
||||
|
||||
@ -271,7 +271,7 @@ pub async fn replace_documents(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
debug!("called with params: {:?}", params);
|
||||
debug!(parameters = ?params, "Replace documents");
|
||||
let params = params.into_inner();
|
||||
|
||||
analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
||||
@ -288,6 +288,7 @@ pub async fn replace_documents(
|
||||
allow_index_creation,
|
||||
)
|
||||
.await?;
|
||||
debug!(returns = ?task, "Replace documents");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
@ -302,8 +303,8 @@ pub async fn update_documents(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
debug!("called with params: {:?}", params);
|
||||
let params = params.into_inner();
|
||||
debug!(parameters = ?params, "Update documents");
|
||||
|
||||
analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
||||
|
||||
@ -319,6 +320,7 @@ pub async fn update_documents(
|
||||
allow_index_creation,
|
||||
)
|
||||
.await?;
|
||||
debug!(returns = ?task, "Update documents");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
@ -427,7 +429,10 @@ async fn document_addition(
|
||||
Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e)))
|
||||
if e.kind() == ErrorKind::NotFound => {}
|
||||
Err(e) => {
|
||||
log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}");
|
||||
tracing::warn!(
|
||||
index_uuid = %uuid,
|
||||
"Unknown error happened while deleting a malformed update file: {e}"
|
||||
);
|
||||
}
|
||||
}
|
||||
// We still want to return the original error to the end user.
|
||||
@ -453,7 +458,6 @@ async fn document_addition(
|
||||
}
|
||||
};
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(task.into())
|
||||
}
|
||||
|
||||
@ -464,7 +468,7 @@ pub async fn delete_documents_batch(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", body);
|
||||
debug!(parameters = ?body, "Delete documents by batch");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
|
||||
@ -479,7 +483,7 @@ pub async fn delete_documents_batch(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Delete documents by batch");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@ -497,7 +501,7 @@ pub async fn delete_documents_by_filter(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", body);
|
||||
debug!(parameters = ?body, "Delete documents by filter");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index_uid = index_uid.into_inner();
|
||||
let filter = body.into_inner().filter;
|
||||
@ -515,7 +519,7 @@ pub async fn delete_documents_by_filter(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Delete documents by filter");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@ -532,7 +536,7 @@ pub async fn clear_all_documents(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Delete all documents");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
|
@ -2,12 +2,12 @@ use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use serde_json::Value;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::{Analytics, FacetSearchAggregator};
|
||||
use crate::extractors::authentication::policies::*;
|
||||
@ -56,7 +56,7 @@ pub async fn search(
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let query = params.into_inner();
|
||||
debug!("facet search called with params: {:?}", query);
|
||||
debug!(parameters = ?query, "Facet search");
|
||||
|
||||
let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
|
||||
|
||||
@ -83,7 +83,7 @@ pub async fn search(
|
||||
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!("returns: {:?}", search_result);
|
||||
debug!(returns = ?search_result, "Facet search");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
|
@ -5,7 +5,6 @@ use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::{DeserializeError, Deserr, ValuePointerRef};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
@ -16,6 +15,7 @@ use meilisearch_types::tasks::KindWithContent;
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use time::OffsetDateTime;
|
||||
use tracing::debug;
|
||||
|
||||
use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
|
||||
use crate::analytics::Analytics;
|
||||
@ -93,6 +93,7 @@ pub async fn list_indexes(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
|
||||
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?paginate, "List indexes");
|
||||
let filters = index_scheduler.filters();
|
||||
let indexes: Vec<Option<IndexView>> =
|
||||
index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
|
||||
@ -105,7 +106,7 @@ pub async fn list_indexes(
|
||||
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
|
||||
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
|
||||
|
||||
debug!("returns: {:?}", ret);
|
||||
debug!(returns = ?ret, "List indexes");
|
||||
Ok(HttpResponse::Ok().json(ret))
|
||||
}
|
||||
|
||||
@ -124,6 +125,7 @@ pub async fn create_index(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Create index");
|
||||
let IndexCreateRequest { primary_key, uid } = body.into_inner();
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
|
||||
@ -137,6 +139,7 @@ pub async fn create_index(
|
||||
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
debug!(returns = ?task, "Create index");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
} else {
|
||||
@ -177,7 +180,7 @@ pub async fn get_index(
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let index_view = IndexView::new(index_uid.into_inner(), &index)?;
|
||||
|
||||
debug!("returns: {:?}", index_view);
|
||||
debug!(returns = ?index_view, "Get index");
|
||||
|
||||
Ok(HttpResponse::Ok().json(index_view))
|
||||
}
|
||||
@ -189,7 +192,7 @@ pub async fn update_index(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", body);
|
||||
debug!(parameters = ?body, "Update index");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let body = body.into_inner();
|
||||
analytics.publish(
|
||||
@ -206,7 +209,7 @@ pub async fn update_index(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Update index");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@ -218,6 +221,7 @@ pub async fn delete_index(
|
||||
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
debug!(returns = ?task, "Delete index");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
@ -255,6 +259,6 @@ pub async fn get_index_stats(
|
||||
|
||||
let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?);
|
||||
|
||||
debug!("returns: {:?}", stats);
|
||||
debug!(returns = ?stats, "Get index stats");
|
||||
Ok(HttpResponse::Ok().json(stats))
|
||||
}
|
||||
|
@ -2,7 +2,6 @@ use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::{debug, warn};
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
@ -12,6 +11,7 @@ use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::vector::DistributionShift;
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use crate::analytics::{Analytics, SearchAggregator};
|
||||
use crate::extractors::authentication::policies::*;
|
||||
@ -186,7 +186,7 @@ pub async fn search_with_url_query(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", params);
|
||||
debug!(parameters = ?params, "Search get");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let mut query: SearchQuery = params.into_inner().into();
|
||||
@ -213,7 +213,7 @@ pub async fn search_with_url_query(
|
||||
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!("returns: {:?}", search_result);
|
||||
debug!(returns = ?search_result, "Search get");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
@ -227,7 +227,7 @@ pub async fn search_with_post(
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let mut query = params.into_inner();
|
||||
debug!("search called with params: {:?}", query);
|
||||
debug!(parameters = ?query, "Search post");
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
@ -252,7 +252,7 @@ pub async fn search_with_post(
|
||||
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!("returns: {:?}", search_result);
|
||||
debug!(returns = ?search_result, "Search post");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,6 @@ use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
@ -11,6 +10,7 @@ use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde_json::json;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
@ -24,12 +24,12 @@ macro_rules! make_setting_route {
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse, Resource};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::settings::{settings, Settings};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use tracing::debug;
|
||||
use $crate::analytics::Analytics;
|
||||
use $crate::extractors::authentication::policies::*;
|
||||
use $crate::extractors::authentication::GuardedData;
|
||||
@ -61,7 +61,7 @@ macro_rules! make_setting_route {
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Delete settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@ -78,6 +78,7 @@ macro_rules! make_setting_route {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let body = body.into_inner();
|
||||
debug!(parameters = ?body, "Update settings");
|
||||
|
||||
#[allow(clippy::redundant_closure_call)]
|
||||
$analytics(&body, &req);
|
||||
@ -109,7 +110,7 @@ macro_rules! make_setting_route {
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Update settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@ -126,7 +127,7 @@ macro_rules! make_setting_route {
|
||||
let rtxn = index.read_txn()?;
|
||||
let settings = settings(&index, &rtxn)?;
|
||||
|
||||
debug!("returns: {:?}", settings);
|
||||
debug!(returns = ?settings, "Update settings");
|
||||
let mut json = serde_json::json!(&settings);
|
||||
let val = json[$camelcase_attr].take();
|
||||
|
||||
@ -656,6 +657,7 @@ pub async fn update_all(
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let new_settings = body.into_inner();
|
||||
debug!(parameters = ?new_settings, "Update all settings");
|
||||
let new_settings = validate_settings(new_settings, &index_scheduler)?;
|
||||
|
||||
analytics.publish(
|
||||
@ -768,7 +770,7 @@ pub async fn update_all(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Update all settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@ -781,7 +783,7 @@ pub async fn get_all(
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
let new_settings = settings(&index, &rtxn)?;
|
||||
debug!("returns: {:?}", new_settings);
|
||||
debug!(returns = ?new_settings, "Get all settings");
|
||||
Ok(HttpResponse::Ok().json(new_settings))
|
||||
}
|
||||
|
||||
@ -804,7 +806,7 @@ pub async fn delete_all(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Delete all settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
|
281
meilisearch/src/routes/logs.rs
Normal file
281
meilisearch/src/routes/logs.rs
Normal file
@ -0,0 +1,281 @@
|
||||
use std::convert::Infallible;
|
||||
use std::io::Write;
|
||||
use std::ops::ControlFlow;
|
||||
use std::pin::Pin;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use actix_web::web::{Bytes, Data};
|
||||
use actix_web::{web, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
||||
use futures_util::Stream;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use tokio::sync::mpsc;
|
||||
use tracing_subscriber::filter::Targets;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::LogRouteHandle;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("stream")
|
||||
.route(web::post().to(SeqHandler(get_logs)))
|
||||
.route(web::delete().to(SeqHandler(cancel_logs))),
|
||||
);
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy, Deserr, PartialEq, Eq)]
|
||||
#[deserr(rename_all = camelCase)]
|
||||
pub enum LogMode {
|
||||
#[default]
|
||||
Human,
|
||||
Profile,
|
||||
}
|
||||
|
||||
/// Simple wrapper around the `Targets` from `tracing_subscriber` to implement `MergeWithError` on it.
|
||||
#[derive(Clone, Debug)]
|
||||
struct MyTargets(Targets);
|
||||
|
||||
/// Simple wrapper around the `ParseError` from `tracing_subscriber` to implement `MergeWithError` on it.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
enum MyParseError {
|
||||
#[error(transparent)]
|
||||
ParseError(#[from] tracing_subscriber::filter::ParseError),
|
||||
#[error(
|
||||
"Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`"
|
||||
)]
|
||||
Example,
|
||||
}
|
||||
|
||||
impl FromStr for MyTargets {
|
||||
type Err = MyParseError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
if s.is_empty() {
|
||||
Err(MyParseError::Example)
|
||||
} else {
|
||||
Ok(MyTargets(Targets::from_str(s).map_err(MyParseError::ParseError)?))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MergeWithError<MyParseError> for DeserrJsonError<BadRequest> {
|
||||
fn merge(
|
||||
_self_: Option<Self>,
|
||||
other: MyParseError,
|
||||
merge_location: ValuePointerRef,
|
||||
) -> ControlFlow<Self, Self> {
|
||||
Self::error::<Infallible>(
|
||||
None,
|
||||
ErrorKind::Unexpected { msg: other.to_string() },
|
||||
merge_location,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields, validate = validate_get_logs -> DeserrJsonError<InvalidSettingsTypoTolerance>)]
|
||||
pub struct GetLogs {
|
||||
#[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError<BadRequest>)]
|
||||
target: MyTargets,
|
||||
|
||||
#[deserr(default, error = DeserrJsonError<BadRequest>)]
|
||||
mode: LogMode,
|
||||
|
||||
#[deserr(default = false, error = DeserrJsonError<BadRequest>)]
|
||||
profile_memory: bool,
|
||||
}
|
||||
|
||||
fn validate_get_logs<E: DeserializeError>(
|
||||
logs: GetLogs,
|
||||
location: ValuePointerRef,
|
||||
) -> Result<GetLogs, E> {
|
||||
if logs.profile_memory && logs.mode != LogMode::Profile {
|
||||
Err(deserr::take_cf_content(E::error::<Infallible>(
|
||||
None,
|
||||
ErrorKind::Unexpected {
|
||||
msg: format!("`profile_memory` can only be used while profiling code and is not compatible with the {:?} mode.", logs.mode),
|
||||
},
|
||||
location,
|
||||
)))
|
||||
} else {
|
||||
Ok(logs)
|
||||
}
|
||||
}
|
||||
|
||||
struct LogWriter {
|
||||
sender: mpsc::UnboundedSender<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl Write for LogWriter {
|
||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||
self.sender.send(buf.to_vec()).map_err(std::io::Error::other)?;
|
||||
Ok(buf.len())
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> std::io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct HandleGuard {
|
||||
/// We need to keep an handle on the logs to make it available again when the streamer is dropped
|
||||
logs: Arc<LogRouteHandle>,
|
||||
}
|
||||
|
||||
impl Drop for HandleGuard {
|
||||
fn drop(&mut self) {
|
||||
if let Err(e) = self.logs.modify(|layer| *layer.inner_mut() = None) {
|
||||
tracing::error!("Could not free the logs route: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn byte_stream(
|
||||
receiver: mpsc::UnboundedReceiver<Vec<u8>>,
|
||||
guard: HandleGuard,
|
||||
) -> impl futures_util::Stream<Item = Result<Bytes, ResponseError>> {
|
||||
futures_util::stream::unfold((receiver, guard), move |(mut receiver, guard)| async move {
|
||||
let vec = receiver.recv().await;
|
||||
|
||||
vec.map(From::from).map(Ok).map(|a| (a, (receiver, guard)))
|
||||
})
|
||||
}
|
||||
|
||||
type PinnedByteStream = Pin<Box<dyn Stream<Item = Result<Bytes, ResponseError>>>>;
|
||||
|
||||
fn make_layer<
|
||||
S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>,
|
||||
>(
|
||||
opt: &GetLogs,
|
||||
logs: Data<LogRouteHandle>,
|
||||
) -> (Box<dyn Layer<S> + Send + Sync>, PinnedByteStream) {
|
||||
let guard = HandleGuard { logs: logs.into_inner() };
|
||||
match opt.mode {
|
||||
LogMode::Human => {
|
||||
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
|
||||
|
||||
let fmt_layer = tracing_subscriber::fmt::layer()
|
||||
.with_writer(move || LogWriter { sender: sender.clone() })
|
||||
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE);
|
||||
|
||||
let stream = byte_stream(receiver, guard);
|
||||
(Box::new(fmt_layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
|
||||
}
|
||||
LogMode::Profile => {
|
||||
let (trace, layer) = tracing_trace::Trace::new(opt.profile_memory);
|
||||
|
||||
let stream = entry_stream(trace, guard);
|
||||
|
||||
(Box::new(layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn entry_stream(
|
||||
trace: tracing_trace::Trace,
|
||||
guard: HandleGuard,
|
||||
) -> impl Stream<Item = Result<Bytes, ResponseError>> {
|
||||
let receiver = trace.into_receiver();
|
||||
let entry_buf = Vec::new();
|
||||
|
||||
futures_util::stream::unfold(
|
||||
(receiver, entry_buf, guard),
|
||||
move |(mut receiver, mut entry_buf, guard)| async move {
|
||||
let mut bytes = Vec::new();
|
||||
|
||||
while bytes.len() < 8192 {
|
||||
entry_buf.clear();
|
||||
|
||||
let Ok(count) = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(1),
|
||||
receiver.recv_many(&mut entry_buf, 100),
|
||||
)
|
||||
.await
|
||||
else {
|
||||
break;
|
||||
};
|
||||
|
||||
if count == 0 {
|
||||
if !bytes.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
// channel closed, exit
|
||||
return None;
|
||||
}
|
||||
|
||||
for entry in &entry_buf {
|
||||
if let Err(error) = serde_json::to_writer(&mut bytes, entry) {
|
||||
tracing::error!(
|
||||
error = &error as &dyn std::error::Error,
|
||||
"deserializing entry"
|
||||
);
|
||||
return Some((
|
||||
Err(ResponseError::from_msg(
|
||||
format!("error deserializing entry: {error}"),
|
||||
Code::Internal,
|
||||
)),
|
||||
(receiver, entry_buf, guard),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some((Ok(bytes.into()), (receiver, entry_buf, guard)))
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub async fn get_logs(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
logs: Data<LogRouteHandle>,
|
||||
body: AwebJson<GetLogs, DeserrJsonError>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_logs_route()?;
|
||||
|
||||
let opt = body.into_inner();
|
||||
let mut stream = None;
|
||||
|
||||
logs.modify(|layer| match layer.inner_mut() {
|
||||
None => {
|
||||
// there is no one getting logs
|
||||
*layer.filter_mut() = opt.target.0.clone();
|
||||
let (new_layer, new_stream) = make_layer(&opt, logs.clone());
|
||||
|
||||
*layer.inner_mut() = Some(new_layer);
|
||||
stream = Some(new_stream);
|
||||
}
|
||||
Some(_) => {
|
||||
// there is already someone getting logs
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
if let Some(stream) = stream {
|
||||
Ok(HttpResponse::Ok().streaming(stream))
|
||||
} else {
|
||||
Err(MeilisearchHttpError::AlreadyUsedLogRoute.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn cancel_logs(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
logs: Data<LogRouteHandle>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_logs_route()?;
|
||||
|
||||
if let Err(e) = logs.modify(|layer| *layer.inner_mut() = None) {
|
||||
tracing::error!("Could not free the logs route: {e}");
|
||||
}
|
||||
|
||||
Ok(HttpResponse::NoContent().finish())
|
||||
}
|
@ -3,7 +3,6 @@ use std::collections::BTreeMap;
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
@ -11,6 +10,7 @@ use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
use time::OffsetDateTime;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
@ -22,6 +22,7 @@ mod api_key;
|
||||
mod dump;
|
||||
pub mod features;
|
||||
pub mod indexes;
|
||||
mod logs;
|
||||
mod metrics;
|
||||
mod multi_search;
|
||||
mod snapshot;
|
||||
@ -31,6 +32,7 @@ pub mod tasks;
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::scope("/tasks").configure(tasks::configure))
|
||||
.service(web::resource("/health").route(web::get().to(get_health)))
|
||||
.service(web::scope("/logs").configure(logs::configure))
|
||||
.service(web::scope("/keys").configure(api_key::configure))
|
||||
.service(web::scope("/dumps").configure(dump::configure))
|
||||
.service(web::scope("/snapshots").configure(snapshot::configure))
|
||||
@ -250,7 +252,7 @@ async fn get_stats(
|
||||
|
||||
let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
|
||||
|
||||
debug!("returns: {:?}", stats);
|
||||
debug!(returns = ?stats, "Get stats");
|
||||
Ok(HttpResponse::Ok().json(stats))
|
||||
}
|
||||
|
||||
|
@ -3,11 +3,11 @@ use actix_web::web::{self, Data};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
use serde::Serialize;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::{Analytics, MultiSearchAggregator};
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
@ -52,7 +52,7 @@ pub async fn multi_search_with_post(
|
||||
for (query_index, (index_uid, mut query)) in
|
||||
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
|
||||
{
|
||||
debug!("multi-search #{query_index}: called with params: {:?}", query);
|
||||
debug!(on_index = query_index, parameters = ?query, "Multi-search");
|
||||
|
||||
// Check index from API key
|
||||
if !index_scheduler.filters().is_index_authorized(&index_uid) {
|
||||
@ -107,7 +107,7 @@ pub async fn multi_search_with_post(
|
||||
err
|
||||
})?;
|
||||
|
||||
debug!("returns: {:?}", search_results);
|
||||
debug!(returns = ?search_results, "Multi-search");
|
||||
|
||||
Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
|
||||
}
|
||||
|
@ -1,10 +1,10 @@
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde_json::json;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
@ -27,6 +27,6 @@ pub async fn create_snapshot(
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
debug!(returns = ?task, "Create snapshot");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
@ -59,6 +59,8 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
|
||||
("POST", "/snapshots") => hashset!{"snapshots.create", "snapshots.*", "*"},
|
||||
("GET", "/version") => hashset!{"version", "*"},
|
||||
("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"},
|
||||
("POST", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
|
||||
("DELETE", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
|
||||
("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"},
|
||||
("GET", "/keys/mykey/") => hashset!{"keys.get", "*"},
|
||||
("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"},
|
||||
|
@ -13,6 +13,8 @@ use meilisearch::{analytics, create_app, setup_meilisearch};
|
||||
use once_cell::sync::Lazy;
|
||||
use tempfile::TempDir;
|
||||
use tokio::time::sleep;
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
use super::index::Index;
|
||||
use super::service::Service;
|
||||
@ -81,10 +83,16 @@ impl Server {
|
||||
Response = ServiceResponse<impl MessageBody>,
|
||||
Error = actix_web::Error,
|
||||
> {
|
||||
let (_route_layer, route_layer_handle) =
|
||||
tracing_subscriber::reload::Layer::new(None.with_filter(
|
||||
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
|
||||
));
|
||||
|
||||
actix_web::test::init_service(create_app(
|
||||
self.service.index_scheduler.clone().into(),
|
||||
self.service.auth.clone().into(),
|
||||
self.service.options.clone(),
|
||||
route_layer_handle,
|
||||
analytics::MockAnalytics::new(&self.service.options),
|
||||
true,
|
||||
))
|
||||
|
@ -7,6 +7,8 @@ use actix_web::test::TestRequest;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch::{analytics, create_app, Opt};
|
||||
use meilisearch_auth::AuthController;
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
use crate::common::encoder::Encoder;
|
||||
use crate::common::Value;
|
||||
@ -105,10 +107,16 @@ impl Service {
|
||||
}
|
||||
|
||||
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
|
||||
let (_route_layer, route_layer_handle) =
|
||||
tracing_subscriber::reload::Layer::new(None.with_filter(
|
||||
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
|
||||
));
|
||||
|
||||
let app = test::init_service(create_app(
|
||||
self.index_scheduler.clone().into(),
|
||||
self.auth.clone().into(),
|
||||
self.options.clone(),
|
||||
route_layer_handle,
|
||||
analytics::MockAnalytics::new(&self.options),
|
||||
true,
|
||||
))
|
||||
|
@ -1847,6 +1847,7 @@ async fn import_dump_v6_containing_experimental_features() {
|
||||
{
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
@ -20,6 +20,7 @@ async fn experimental_features() {
|
||||
{
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
@ -31,6 +32,7 @@ async fn experimental_features() {
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
@ -42,6 +44,7 @@ async fn experimental_features() {
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
@ -54,6 +57,7 @@ async fn experimental_features() {
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
@ -66,6 +70,7 @@ async fn experimental_features() {
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
@ -85,6 +90,7 @@ async fn experimental_feature_metrics() {
|
||||
{
|
||||
"vectorStore": false,
|
||||
"metrics": true,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
@ -140,7 +146,7 @@ async fn errors() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `exportPuffinReports`",
|
||||
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
|
@ -5,6 +5,7 @@ mod documents;
|
||||
mod dumps;
|
||||
mod features;
|
||||
mod index;
|
||||
mod logs;
|
||||
mod search;
|
||||
mod settings;
|
||||
mod snapshot;
|
||||
|
182
meilisearch/tests/logs/error.rs
Normal file
182
meilisearch/tests/logs/error.rs
Normal file
@ -0,0 +1,182 @@
|
||||
use meili_snap::*;
|
||||
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn logs_stream_bad_target() {
|
||||
let server = Server::new().await;
|
||||
|
||||
// Wrong type
|
||||
let (response, code) = server.service.post("/logs/stream", json!({ "target": true })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.target`: expected a string, but found a boolean: `true`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Wrong type
|
||||
let (response, code) = server.service.post("/logs/stream", json!({ "target": [] })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.target`: expected a string, but found an array: `[]`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Our help message
|
||||
let (response, code) = server.service.post("/logs/stream", json!({ "target": "" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value at `.target`: Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// An error from the target parser
|
||||
let (response, code) = server.service.post("/logs/stream", json!({ "target": "==" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value at `.target`: invalid filter directive: too many '=' in filter directive, expected 0 or 1",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn logs_stream_bad_mode() {
|
||||
let server = Server::new().await;
|
||||
|
||||
// Wrong type
|
||||
let (response, code) = server.service.post("/logs/stream", json!({ "mode": true })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.mode`: expected a string, but found a boolean: `true`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Wrong type
|
||||
let (response, code) = server.service.post("/logs/stream", json!({ "mode": [] })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.mode`: expected a string, but found an array: `[]`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Wrong value
|
||||
let (response, code) = server.service.post("/logs/stream", json!({ "mode": "tamo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Unknown value `tamo` at `.mode`: expected one of `human`, `profile`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn logs_stream_bad_profile_memory() {
|
||||
let server = Server::new().await;
|
||||
|
||||
// Wrong type
|
||||
let (response, code) =
|
||||
server.service.post("/logs/stream", json!({ "profileMemory": "tamo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found a string: `\"tamo\"`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Wrong type
|
||||
let (response, code) =
|
||||
server.service.post("/logs/stream", json!({ "profileMemory": ["hello", "kefir"] })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found an array: `[\"hello\",\"kefir\"]`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Used with default parameters
|
||||
let (response, code) =
|
||||
server.service.post("/logs/stream", json!({ "profileMemory": true })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Human mode.",
|
||||
"code": "invalid_settings_typo_tolerance",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance"
|
||||
}
|
||||
"###);
|
||||
|
||||
// Used with an unsupported mode
|
||||
let (response, code) =
|
||||
server.service.post("/logs/stream", json!({ "mode": "fmt", "profileMemory": true })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Unknown value `fmt` at `.mode`: expected one of `human`, `profile`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn logs_stream_without_enabling_the_route() {
|
||||
let server = Server::new().await;
|
||||
|
||||
let (response, code) = server.service.post("/logs/stream", json!({})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = server.service.delete("/logs/stream").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
|
||||
"code": "feature_not_enabled",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||
}
|
||||
"###);
|
||||
}
|
92
meilisearch/tests/logs/mod.rs
Normal file
92
meilisearch/tests/logs/mod.rs
Normal file
@ -0,0 +1,92 @@
|
||||
mod error;
|
||||
|
||||
use std::rc::Rc;
|
||||
use std::str::FromStr;
|
||||
|
||||
use actix_web::http::header::ContentType;
|
||||
use meili_snap::snapshot;
|
||||
use meilisearch::{analytics, create_app, Opt};
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
use crate::common::{default_settings, Server};
|
||||
use crate::json;
|
||||
|
||||
#[actix_web::test]
|
||||
async fn basic_test_log_stream_route() {
|
||||
let db_path = tempfile::tempdir().unwrap();
|
||||
let server = Server::new_with_options(Opt {
|
||||
experimental_enable_logs_route: true,
|
||||
..default_settings(db_path.path())
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let (route_layer, route_layer_handle) =
|
||||
tracing_subscriber::reload::Layer::new(None.with_filter(
|
||||
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
|
||||
));
|
||||
|
||||
let subscriber = tracing_subscriber::registry().with(route_layer).with(
|
||||
tracing_subscriber::fmt::layer()
|
||||
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE)
|
||||
.with_filter(tracing_subscriber::filter::LevelFilter::from_str("INFO").unwrap()),
|
||||
);
|
||||
|
||||
let app = actix_web::test::init_service(create_app(
|
||||
server.service.index_scheduler.clone().into(),
|
||||
server.service.auth.clone().into(),
|
||||
server.service.options.clone(),
|
||||
route_layer_handle,
|
||||
analytics::MockAnalytics::new(&server.service.options),
|
||||
true,
|
||||
))
|
||||
.await;
|
||||
|
||||
// set the subscriber as the default for the application
|
||||
tracing::subscriber::set_global_default(subscriber).unwrap();
|
||||
|
||||
let app = Rc::new(app);
|
||||
|
||||
// First, we start listening on the `/logs/stream` route
|
||||
let handle_app = app.clone();
|
||||
let handle = tokio::task::spawn_local(async move {
|
||||
let req = actix_web::test::TestRequest::post()
|
||||
.uri("/logs/stream")
|
||||
.insert_header(ContentType::json())
|
||||
.set_payload(
|
||||
serde_json::to_vec(&json!({
|
||||
"mode": "human",
|
||||
"target": "info",
|
||||
}))
|
||||
.unwrap(),
|
||||
);
|
||||
let req = req.to_request();
|
||||
let ret = actix_web::test::call_service(&*handle_app, req).await;
|
||||
actix_web::test::read_body(ret).await
|
||||
});
|
||||
|
||||
// We're going to create an index to get at least one info log saying we processed a batch of task
|
||||
let (ret, _code) = server.create_index(json!({ "uid": "tamo" })).await;
|
||||
snapshot!(ret, @r###"
|
||||
{
|
||||
"taskUid": 0,
|
||||
"indexUid": "tamo",
|
||||
"status": "enqueued",
|
||||
"type": "indexCreation",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
server.wait_task(ret.uid()).await;
|
||||
|
||||
let req = actix_web::test::TestRequest::delete().uri("/logs/stream");
|
||||
let req = req.to_request();
|
||||
let ret = actix_web::test::call_service(&*app, req).await;
|
||||
let code = ret.status();
|
||||
snapshot!(code, @"204 No Content");
|
||||
|
||||
let logs = handle.await.unwrap();
|
||||
let logs = String::from_utf8(logs.to_vec()).unwrap();
|
||||
assert!(logs.contains("INFO"), "{logs}");
|
||||
}
|
@ -15,6 +15,7 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false,
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
@ -44,10 +44,6 @@ struct WebhookHandle {
|
||||
}
|
||||
|
||||
async fn create_webhook_server() -> WebhookHandle {
|
||||
let mut log_builder = env_logger::Builder::new();
|
||||
log_builder.parse_filters("info");
|
||||
log_builder.init();
|
||||
|
||||
let (sender, receiver) = mpsc::unbounded_channel();
|
||||
let sender = Arc::new(sender);
|
||||
|
||||
|
@ -71,7 +71,6 @@ itertools = "0.11.0"
|
||||
puffin = "0.16.0"
|
||||
|
||||
# logging
|
||||
log = "0.4.20"
|
||||
logging_timer = "1.1.0"
|
||||
csv = "1.3.0"
|
||||
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
||||
@ -91,6 +90,7 @@ tiktoken-rs = "0.5.8"
|
||||
liquid = "0.26.4"
|
||||
arroy = "0.2.0"
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.40"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.39", default-features = false }
|
||||
@ -102,15 +102,7 @@ meili-snap = { path = "../meili-snap" }
|
||||
rand = { version = "0.8.5", features = ["small_rng"] }
|
||||
|
||||
[features]
|
||||
all-tokenizations = [
|
||||
"charabia/chinese",
|
||||
"charabia/hebrew",
|
||||
"charabia/japanese",
|
||||
"charabia/thai",
|
||||
"charabia/korean",
|
||||
"charabia/greek",
|
||||
"charabia/khmer",
|
||||
]
|
||||
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]
|
||||
|
||||
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
||||
# For more information on this feature, see heed's Cargo.toml
|
||||
|
@ -25,6 +25,7 @@ impl<R: io::Read + io::Seek> DocumentsBatchReader<R> {
|
||||
///
|
||||
/// It first retrieves the index, then moves to the first document. Use the `into_cursor`
|
||||
/// method to iterator over the documents, from the first to the last.
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
|
||||
pub fn from_reader(reader: R) -> Result<Self, Error> {
|
||||
let reader = grenad::Reader::new(reader)?;
|
||||
let mut cursor = reader.into_cursor()?;
|
||||
|
@ -6,9 +6,9 @@ use charabia::Normalize;
|
||||
use fst::automaton::{Automaton, Str};
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
||||
use log::error;
|
||||
use once_cell::sync::Lazy;
|
||||
use roaring::bitmap::RoaringBitmap;
|
||||
use tracing::error;
|
||||
|
||||
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
|
||||
|
@ -166,6 +166,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
continue;
|
||||
}
|
||||
|
||||
let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id());
|
||||
let entered = span.enter();
|
||||
|
||||
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
|
||||
ctx,
|
||||
logger,
|
||||
@ -175,6 +178,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
back!();
|
||||
continue;
|
||||
};
|
||||
drop(entered);
|
||||
|
||||
ranking_rule_scores.push(next_bucket.score);
|
||||
|
||||
|
@ -14,6 +14,12 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
|
||||
ClearDocuments { wtxn, index }
|
||||
}
|
||||
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip(self),
|
||||
target = "indexing::documents",
|
||||
name = "clear_documents"
|
||||
)]
|
||||
pub fn execute(self) -> Result<u64> {
|
||||
puffin::profile_function!();
|
||||
|
||||
|
@ -85,8 +85,8 @@ use charabia::normalizer::{Normalize, NormalizerOption};
|
||||
use grenad::{CompressionType, SortAlgorithm};
|
||||
use heed::types::{Bytes, DecodeIgnore, SerdeJson};
|
||||
use heed::BytesEncode;
|
||||
use log::debug;
|
||||
use time::OffsetDateTime;
|
||||
use tracing::debug;
|
||||
|
||||
use self::incremental::FacetsUpdateIncremental;
|
||||
use super::FacetsUpdateBulk;
|
||||
|
@ -22,6 +22,7 @@ use crate::{FieldId, Index, Result};
|
||||
/// # Panics
|
||||
///
|
||||
/// - if reader.is_empty(), this function may panic in some cases
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
|
||||
pub fn enrich_documents_batch<R: Read + Seek>(
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
@ -77,7 +78,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
||||
},
|
||||
[] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
|
||||
[(field_id, name)] => {
|
||||
log::info!("Primary key was not specified in index. Inferred to '{name}'");
|
||||
tracing::info!("Primary key was not specified in index. Inferred to '{name}'");
|
||||
PrimaryKey::Flat { name, field_id: *field_id }
|
||||
}
|
||||
multiple => {
|
||||
@ -143,6 +144,8 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
||||
|
||||
/// Retrieve the document id after validating it, returning a `UserError`
|
||||
/// if the id is invalid or can't be guessed.
|
||||
#[tracing::instrument(level = "trace", skip(uuid_buffer, documents_batch_index, document)
|
||||
target = "indexing::documents")]
|
||||
fn fetch_or_generate_document_id(
|
||||
document: &obkv::KvReader<FieldId>,
|
||||
documents_batch_index: &DocumentsBatchIndex,
|
||||
|
@ -21,7 +21,7 @@ pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, R
|
||||
///
|
||||
/// Returns the generated internal documents ids and a grenad reader
|
||||
/// with the list of extracted words from the given chunk of documents.
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
|
@ -16,7 +16,7 @@ use crate::Result;
|
||||
///
|
||||
/// Returns a grenad reader with the list of extracted facet numbers and
|
||||
/// documents ids from the given chunk of docid facet number positions.
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
||||
fid_docid_facet_number: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
|
@ -15,7 +15,7 @@ use crate::{FieldId, Result};
|
||||
///
|
||||
/// Returns a grenad reader with the list of extracted facet strings and
|
||||
/// documents ids from the given chunk of docid facet string positions.
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_string: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
|
@ -39,7 +39,7 @@ pub struct ExtractedFacetValues {
|
||||
/// Returns the generated grenad reader containing the docid the fid and the orginal value as key
|
||||
/// and the normalized value as value extracted from the given chunk of documents.
|
||||
/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
@ -431,7 +431,7 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
|
||||
if let Ok(float) = original.parse() {
|
||||
output_numbers.push(float);
|
||||
} else {
|
||||
log::warn!(
|
||||
tracing::warn!(
|
||||
"Internal error, could not parse a geofield that has been validated. Please open an issue."
|
||||
)
|
||||
}
|
||||
|
@ -19,7 +19,7 @@ const MAX_COUNTED_WORDS: usize = 30;
|
||||
///
|
||||
/// Returns a grenad reader with the list of extracted field id word counts
|
||||
/// and documents ids from the given chunk of docid word positions.
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
|
@ -13,7 +13,7 @@ use crate::{FieldId, InternalError, Result};
|
||||
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
|
||||
///
|
||||
/// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude)
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
|
@ -67,7 +67,7 @@ impl VectorStateDelta {
|
||||
/// Extracts the embedding vector contained in each document under the `_vectors` field.
|
||||
///
|
||||
/// Returns the generated grenad reader containing the docid as key associated to the Vec<f32>
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
@ -186,12 +186,12 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default();
|
||||
let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?;
|
||||
if old_prompt != new_prompt {
|
||||
log::trace!(
|
||||
tracing::trace!(
|
||||
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
|
||||
);
|
||||
VectorStateDelta::NowGenerated(new_prompt)
|
||||
} else {
|
||||
log::trace!("⏭️ Prompt unmodified, skipping");
|
||||
tracing::trace!("⏭️ Prompt unmodified, skipping");
|
||||
VectorStateDelta::NoChange
|
||||
}
|
||||
} else {
|
||||
|
@ -23,7 +23,7 @@ use crate::{DocumentId, FieldId, Result};
|
||||
///
|
||||
/// The first returned reader is the one for normal word_docids, and the second one is for
|
||||
/// exact_word_docids
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
@ -135,6 +135,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
))
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
fn words_into_sorter(
|
||||
document_id: DocumentId,
|
||||
fid: FieldId,
|
||||
|
@ -19,7 +19,7 @@ use crate::{DocumentId, Result};
|
||||
///
|
||||
/// Returns a grenad reader with the list of extracted word pairs proximities and
|
||||
/// documents ids from the given chunk of docid word positions.
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
@ -59,6 +59,10 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
if current_document_id.map_or(false, |id| id != document_id) {
|
||||
puffin::profile_scope!("Document into sorter");
|
||||
|
||||
// FIXME: span inside of a hot loop might degrade performance and create big reports
|
||||
let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter");
|
||||
let _entered = span.enter();
|
||||
|
||||
document_word_positions_into_sorter(
|
||||
current_document_id.unwrap(),
|
||||
&del_word_pair_proximity,
|
||||
@ -138,6 +142,10 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
|
||||
if let Some(document_id) = current_document_id {
|
||||
puffin::profile_scope!("Final document into sorter");
|
||||
// FIXME: span inside of a hot loop might degrade performance and create big reports
|
||||
let span = tracing::trace_span!(target: "indexing::details", "final_document_into_sorter");
|
||||
let _entered = span.enter();
|
||||
|
||||
document_word_positions_into_sorter(
|
||||
document_id,
|
||||
&del_word_pair_proximity,
|
||||
@ -147,6 +155,10 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
}
|
||||
{
|
||||
puffin::profile_scope!("sorter_into_reader");
|
||||
// FIXME: span inside of a hot loop might degrade performance and create big reports
|
||||
let span = tracing::trace_span!(target: "indexing::details", "sorter_into_reader");
|
||||
let _entered = span.enter();
|
||||
|
||||
let mut writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
@ -18,7 +18,7 @@ use crate::{bucketed_position, DocumentId, Result};
|
||||
///
|
||||
/// Returns a grenad reader with the list of extracted words at positions and
|
||||
/// documents ids from the given chunk of docid word positions.
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
@ -94,6 +94,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
Ok(word_position_docids_reader)
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
fn words_position_into_sorter(
|
||||
document_id: DocumentId,
|
||||
key_buffer: &mut Vec<u8>,
|
||||
|
@ -14,8 +14,8 @@ use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
use crossbeam_channel::Sender;
|
||||
use log::debug;
|
||||
use rayon::prelude::*;
|
||||
use tracing::debug;
|
||||
|
||||
use self::extract_docid_word_positions::extract_docid_word_positions;
|
||||
use self::extract_facet_number_docids::extract_facet_number_docids;
|
||||
@ -41,6 +41,7 @@ use crate::{FieldId, FieldsIdsMap, Result};
|
||||
/// Extract data for each databases from obkv documents in parallel.
|
||||
/// Send data in grenad file over provided Sender.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
pub(crate) fn data_from_obkv_documents(
|
||||
original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
|
||||
flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
|
||||
@ -113,7 +114,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
{
|
||||
let lmdb_writer_sx = lmdb_writer_sx.clone();
|
||||
rayon::spawn(move || {
|
||||
debug!("merge {} database", "facet-id-exists-docids");
|
||||
debug!(database = "facet-id-exists-docids", "merge");
|
||||
match facet_exists_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
|
||||
Ok(reader) => {
|
||||
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetExistsDocids(reader)));
|
||||
@ -129,7 +130,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
{
|
||||
let lmdb_writer_sx = lmdb_writer_sx.clone();
|
||||
rayon::spawn(move || {
|
||||
debug!("merge {} database", "facet-id-is-null-docids");
|
||||
debug!(database = "facet-id-is-null-docids", "merge");
|
||||
match facet_is_null_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
|
||||
Ok(reader) => {
|
||||
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader)));
|
||||
@ -145,7 +146,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
{
|
||||
let lmdb_writer_sx = lmdb_writer_sx.clone();
|
||||
rayon::spawn(move || {
|
||||
debug!("merge {} database", "facet-id-is-empty-docids");
|
||||
debug!(database = "facet-id-is-empty-docids", "merge");
|
||||
match facet_is_empty_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
|
||||
Ok(reader) => {
|
||||
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader)));
|
||||
@ -257,13 +258,22 @@ fn spawn_extraction_task<FE, FS, M>(
|
||||
M: MergeableReader + FromParallelIterator<M::Output> + Send + 'static,
|
||||
M::Output: Send,
|
||||
{
|
||||
let current_span = tracing::Span::current();
|
||||
|
||||
rayon::spawn(move || {
|
||||
puffin::profile_scope!("extract_multiple_chunks", name);
|
||||
let child_span =
|
||||
tracing::trace_span!(target: "", parent: ¤t_span, "extract_multiple_chunks");
|
||||
let _entered = child_span.enter();
|
||||
puffin::profile_scope!("extract_multiple_chunksdexing::details, ", name);
|
||||
let chunks: Result<M> =
|
||||
chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect();
|
||||
let current_span = tracing::Span::current();
|
||||
|
||||
rayon::spawn(move || match chunks {
|
||||
Ok(chunks) => {
|
||||
debug!("merge {} database", name);
|
||||
let child_span = tracing::trace_span!(target: "", parent: ¤t_span, "merge_multiple_chunks");
|
||||
let _entered = child_span.enter();
|
||||
debug!(database = name, "merge");
|
||||
puffin::profile_scope!("merge_multiple_chunks", name);
|
||||
let reader = chunks.merge(merge_fn, &indexer);
|
||||
let _ = lmdb_writer_sx.send(reader.map(serialize_fn));
|
||||
|
@ -56,6 +56,7 @@ pub fn create_sorter(
|
||||
builder.build()
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")]
|
||||
pub fn sorter_into_reader(
|
||||
sorter: grenad::Sorter<MergeFn>,
|
||||
indexer: GrenadParameters,
|
||||
@ -252,6 +253,7 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
|
||||
|
||||
/// Write provided sorter in database using serialize_value function.
|
||||
/// merge_values function is used if an entry already exist in the database.
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")]
|
||||
pub fn write_sorter_into_database<K, V, FS, FM>(
|
||||
sorter: Sorter<MergeFn>,
|
||||
database: &heed::Database<K, V>,
|
||||
|
@ -13,11 +13,11 @@ use std::result::Result as StdResult;
|
||||
use crossbeam_channel::{Receiver, Sender};
|
||||
use heed::types::Str;
|
||||
use heed::Database;
|
||||
use log::debug;
|
||||
use rand::SeedableRng;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use slice_group_by::GroupBy;
|
||||
use tracing::debug_span;
|
||||
use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
|
||||
|
||||
use self::enrich::enrich_documents_batch;
|
||||
@ -134,6 +134,7 @@ where
|
||||
/// return an error and not the `IndexDocuments` struct as it is invalid to use it afterward.
|
||||
///
|
||||
/// Returns the number of documents added to the builder.
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
|
||||
pub fn add_documents<R: Read + Seek>(
|
||||
mut self,
|
||||
reader: DocumentsBatchReader<R>,
|
||||
@ -179,6 +180,7 @@ where
|
||||
/// Remove a batch of documents from the current builder.
|
||||
///
|
||||
/// Returns the number of documents deleted from the builder.
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
|
||||
pub fn remove_documents(
|
||||
mut self,
|
||||
to_delete: Vec<String>,
|
||||
@ -214,6 +216,7 @@ where
|
||||
/// - No batching using the standards `remove_documents` and `add_documents` took place
|
||||
///
|
||||
/// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function.
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::details")]
|
||||
pub fn remove_documents_from_db_no_batch(
|
||||
mut self,
|
||||
to_delete: &RoaringBitmap,
|
||||
@ -237,7 +240,12 @@ where
|
||||
Ok((self, deleted_documents))
|
||||
}
|
||||
|
||||
#[logging_timer::time("IndexDocuments::{}")]
|
||||
#[tracing::instrument(
|
||||
level = "trace"
|
||||
skip_all,
|
||||
target = "indexing::documents",
|
||||
name = "index_documents"
|
||||
)]
|
||||
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
|
||||
puffin::profile_function!();
|
||||
|
||||
@ -273,7 +281,12 @@ where
|
||||
}
|
||||
|
||||
/// Returns the total number of documents in the index after the update.
|
||||
#[logging_timer::time("IndexDocuments::{}")]
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip_all,
|
||||
target = "profile::indexing::details",
|
||||
name = "index_documents_raw"
|
||||
)]
|
||||
pub fn execute_raw(self, output: TransformOutput) -> Result<u64>
|
||||
where
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
@ -374,8 +387,12 @@ where
|
||||
|
||||
let cloned_embedder = self.embedders.clone();
|
||||
|
||||
let current_span = tracing::Span::current();
|
||||
|
||||
// Run extraction pipeline in parallel.
|
||||
pool.install(|| {
|
||||
let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "extract_and_send_grenad_chunks");
|
||||
let _enter = child_span.enter();
|
||||
puffin::profile_scope!("extract_and_send_grenad_chunks");
|
||||
// split obkv file into several chunks
|
||||
let original_chunk_iter =
|
||||
@ -489,10 +506,7 @@ where
|
||||
documents_seen: documents_seen_count as usize,
|
||||
total_documents: documents_count,
|
||||
});
|
||||
debug!(
|
||||
"We have seen {} documents on {} total document so far",
|
||||
documents_seen_count, documents_count
|
||||
);
|
||||
debug_span!("Seen", documents = documents_seen_count, total = documents_count);
|
||||
}
|
||||
if is_merged_database {
|
||||
databases_seen += 1;
|
||||
@ -543,7 +557,12 @@ where
|
||||
Ok(number_of_documents)
|
||||
}
|
||||
|
||||
#[logging_timer::time("IndexDocuments::{}")]
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip_all,
|
||||
target = "indexing::prefix",
|
||||
name = "index_documents_prefix_databases"
|
||||
)]
|
||||
pub fn execute_prefix_databases(
|
||||
self,
|
||||
word_docids: Option<grenad::Reader<CursorClonableMmap>>,
|
||||
@ -598,6 +617,8 @@ where
|
||||
let del_prefix_fst_words;
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::details", "compute_prefix_diffs");
|
||||
let _entered = span.enter();
|
||||
puffin::profile_scope!("compute_prefix_diffs");
|
||||
|
||||
current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
|
||||
@ -722,6 +743,12 @@ where
|
||||
|
||||
/// Run the word prefix docids update operation.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip_all,
|
||||
target = "indexing::prefix",
|
||||
name = "index_documents_word_prefix_docids"
|
||||
)]
|
||||
fn execute_word_prefix_docids(
|
||||
txn: &mut heed::RwTxn,
|
||||
reader: grenad::Reader<Cursor<ClonableMmap>>,
|
||||
|
@ -146,7 +146,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
})
|
||||
}
|
||||
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
|
||||
pub fn read_documents<R, FP, FA>(
|
||||
&mut self,
|
||||
reader: EnrichedDocumentsBatchReader<R>,
|
||||
@ -359,7 +359,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
/// - If the document to remove was inserted by the `read_documents` method before but was NOT present in the db,
|
||||
/// it's added into the grenad to ensure we don't insert it + removed from the list of new documents ids.
|
||||
/// - If the document to remove was not present in either the db or the transform we do nothing.
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
|
||||
pub fn remove_documents<FA>(
|
||||
&mut self,
|
||||
mut to_remove: Vec<String>,
|
||||
@ -450,7 +450,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
/// - No batching using the standards `remove_documents` and `add_documents` took place
|
||||
///
|
||||
/// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function.
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::details")]
|
||||
pub fn remove_documents_from_db_no_batch<FA>(
|
||||
&mut self,
|
||||
to_remove: &RoaringBitmap,
|
||||
@ -541,6 +541,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
|
||||
// Flatten a document from the fields ids map contained in self and insert the new
|
||||
// created fields. Returns `None` if the document doesn't need to be flattened.
|
||||
#[tracing::instrument(level = "trace", skip(self, obkv), target = "indexing::transform")]
|
||||
fn flatten_from_fields_ids_map(&mut self, obkv: KvReader<FieldId>) -> Result<Option<Vec<u8>>> {
|
||||
if obkv
|
||||
.iter()
|
||||
@ -661,7 +662,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
/// Generate the `TransformOutput` based on the given sorter that can be generated from any
|
||||
/// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document
|
||||
/// id for the user side and the value must be an obkv where keys are valid fields ids.
|
||||
#[logging_timer::time]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::transform")]
|
||||
pub(crate) fn output_from_sorter<F>(
|
||||
self,
|
||||
wtxn: &mut heed::RwTxn,
|
||||
|
@ -115,6 +115,7 @@ impl TypedChunk {
|
||||
|
||||
/// Write typed chunk in the corresponding LMDB database of the provided index.
|
||||
/// Return new documents seen.
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
|
||||
pub(crate) fn write_typed_chunk_into_index(
|
||||
typed_chunk: TypedChunk,
|
||||
index: &Index,
|
||||
@ -126,6 +127,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
let mut is_merged_database = false;
|
||||
match typed_chunk {
|
||||
TypedChunk::Documents(obkv_documents_iter) => {
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "documents");
|
||||
let _entered = span.enter();
|
||||
let mut operations: Vec<DocumentOperation> = Default::default();
|
||||
|
||||
let mut docids = index.documents_ids(wtxn)?;
|
||||
@ -172,6 +175,9 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
index.put_documents_ids(wtxn, &docids)?;
|
||||
}
|
||||
TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => {
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::write_db", "field_id_word_count_docids");
|
||||
let _entered = span.enter();
|
||||
append_entries_into_database(
|
||||
fid_word_count_docids_iter,
|
||||
&index.field_id_word_count_docids,
|
||||
@ -187,6 +193,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
exact_word_docids_reader,
|
||||
word_fid_docids_reader,
|
||||
} => {
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "word_docids");
|
||||
let _entered = span.enter();
|
||||
let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_reader) }?;
|
||||
append_entries_into_database(
|
||||
word_docids_iter.clone(),
|
||||
@ -230,6 +238,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
is_merged_database = true;
|
||||
}
|
||||
TypedChunk::WordPositionDocids(word_position_docids_iter) => {
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "word_position_docids");
|
||||
let _entered = span.enter();
|
||||
append_entries_into_database(
|
||||
word_position_docids_iter,
|
||||
&index.word_position_docids,
|
||||
@ -241,16 +251,25 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
is_merged_database = true;
|
||||
}
|
||||
TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => {
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::write_db","field_id_facet_number_docids");
|
||||
let _entered = span.enter();
|
||||
let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter);
|
||||
indexer.execute(wtxn)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
TypedChunk::FieldIdFacetStringDocids(facet_id_string_docids_iter) => {
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_string_docids");
|
||||
let _entered = span.enter();
|
||||
let indexer = FacetsUpdate::new(index, FacetType::String, facet_id_string_docids_iter);
|
||||
indexer.execute(wtxn)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
TypedChunk::FieldIdFacetExistsDocids(facet_id_exists_docids) => {
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_exists_docids");
|
||||
let _entered = span.enter();
|
||||
append_entries_into_database(
|
||||
facet_id_exists_docids,
|
||||
&index.facet_id_exists_docids,
|
||||
@ -262,6 +281,9 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
is_merged_database = true;
|
||||
}
|
||||
TypedChunk::FieldIdFacetIsNullDocids(facet_id_is_null_docids) => {
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_null_docids");
|
||||
let _entered = span.enter();
|
||||
append_entries_into_database(
|
||||
facet_id_is_null_docids,
|
||||
&index.facet_id_is_null_docids,
|
||||
@ -273,6 +295,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
is_merged_database = true;
|
||||
}
|
||||
TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => {
|
||||
let span = tracing::trace_span!(target: "profile::indexing::write_db", "field_id_facet_is_empty_docids");
|
||||
let _entered = span.enter();
|
||||
append_entries_into_database(
|
||||
facet_id_is_empty_docids,
|
||||
&index.facet_id_is_empty_docids,
|
||||
@ -284,6 +308,9 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
is_merged_database = true;
|
||||
}
|
||||
TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => {
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::write_db", "word_pair_proximity_docids");
|
||||
let _entered = span.enter();
|
||||
append_entries_into_database(
|
||||
word_pair_proximity_docids_iter,
|
||||
&index.word_pair_proximity_docids,
|
||||
@ -295,6 +322,9 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
is_merged_database = true;
|
||||
}
|
||||
TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => {
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_numbers");
|
||||
let _entered = span.enter();
|
||||
let index_fid_docid_facet_numbers =
|
||||
index.field_id_docid_facet_f64s.remap_types::<Bytes, Bytes>();
|
||||
let mut cursor = fid_docid_facet_number.into_cursor()?;
|
||||
@ -315,6 +345,9 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
}
|
||||
}
|
||||
TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => {
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_strings");
|
||||
let _entered = span.enter();
|
||||
let index_fid_docid_facet_strings =
|
||||
index.field_id_docid_facet_strings.remap_types::<Bytes, Bytes>();
|
||||
let mut cursor = fid_docid_facet_string.into_cursor()?;
|
||||
@ -335,6 +368,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
}
|
||||
}
|
||||
TypedChunk::GeoPoints(geo_points) => {
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "geo_points");
|
||||
let _entered = span.enter();
|
||||
let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
|
||||
let mut geo_faceted_docids = index.geo_faceted_documents_ids(wtxn)?;
|
||||
|
||||
@ -365,6 +400,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
expected_dimension,
|
||||
embedder_name,
|
||||
} => {
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "vector_points");
|
||||
let _entered = span.enter();
|
||||
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
||||
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
||||
)?;
|
||||
@ -480,9 +517,11 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
}
|
||||
}
|
||||
|
||||
log::debug!("Finished vector chunk for {}", embedder_name);
|
||||
tracing::debug!("Finished vector chunk for {}", embedder_name);
|
||||
}
|
||||
TypedChunk::ScriptLanguageDocids(sl_map) => {
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids");
|
||||
let _entered = span.enter();
|
||||
for (key, (deletion, addition)) in sl_map {
|
||||
let mut db_key_exists = false;
|
||||
let final_value = match index.script_language_docids.get(wtxn, &key)? {
|
||||
@ -536,6 +575,7 @@ fn merge_word_docids_reader_into_fst(
|
||||
|
||||
/// Write provided entries in database using serialize_value function.
|
||||
/// merge_values function is used if an entry already exist in the database.
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
|
||||
fn write_entries_into_database<R, K, V, FS, FM>(
|
||||
data: grenad::Reader<R>,
|
||||
database: &heed::Database<K, V>,
|
||||
@ -582,6 +622,7 @@ where
|
||||
/// merge_values function is used if an entry already exist in the database.
|
||||
/// All provided entries must be ordered.
|
||||
/// If the index is not empty, write_entries_into_database is called instead.
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
|
||||
fn append_entries_into_database<R, K, V, FS, FM>(
|
||||
data: grenad::Reader<R>,
|
||||
database: &heed::Database<K, V>,
|
||||
|
@ -372,6 +372,11 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
self.embedder_settings = Setting::Reset;
|
||||
}
|
||||
|
||||
#[tracing::instrument(
|
||||
level = "trace"
|
||||
skip(self, progress_callback, should_abort, old_fields_ids_map),
|
||||
target = "indexing::documents"
|
||||
)]
|
||||
fn reindex<FP, FA>(
|
||||
&mut self,
|
||||
progress_callback: &FP,
|
||||
|
@ -39,7 +39,12 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
#[logging_timer::time("WordPrefixDocids::{}")]
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip_all,
|
||||
target = "indexing::prefix",
|
||||
name = "word_prefix_docids"
|
||||
)]
|
||||
pub fn execute(
|
||||
self,
|
||||
mut new_word_docids_iter: grenad::ReaderCursor<CursorClonableMmap>,
|
||||
|
@ -4,7 +4,7 @@ use std::str;
|
||||
use grenad::CompressionType;
|
||||
use heed::types::Bytes;
|
||||
use heed::{BytesDecode, BytesEncode, Database};
|
||||
use log::debug;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::error::SerializationError;
|
||||
use crate::heed_codec::StrBEU16Codec;
|
||||
@ -44,7 +44,12 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
#[logging_timer::time("WordPrefixIntegerDocids::{}")]
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip_all,
|
||||
target = "indexing::prefix",
|
||||
name = "words_prefix_integer_docids"
|
||||
)]
|
||||
pub fn execute(
|
||||
self,
|
||||
new_word_integer_docids: grenad::Reader<CursorClonableMmap>,
|
||||
|
@ -38,7 +38,12 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
|
||||
self
|
||||
}
|
||||
|
||||
#[logging_timer::time("WordsPrefixesFst::{}")]
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip_all,
|
||||
target = "indexing::prefix",
|
||||
name = "words_prefix_fst"
|
||||
)]
|
||||
pub fn execute(self) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
|
@ -73,7 +73,7 @@ impl Embedder {
|
||||
let device = match candle_core::Device::cuda_if_available(0) {
|
||||
Ok(device) => device,
|
||||
Err(error) => {
|
||||
log::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error);
|
||||
tracing::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error);
|
||||
candle_core::Device::Cpu
|
||||
}
|
||||
};
|
||||
|
@ -173,12 +173,16 @@ impl Embedder {
|
||||
let retry_duration = match result {
|
||||
Ok(embeddings) => return Ok(embeddings),
|
||||
Err(retry) => {
|
||||
log::warn!("Failed: {}", retry.error);
|
||||
tracing::warn!("Failed: {}", retry.error);
|
||||
tokenized |= retry.must_tokenize();
|
||||
retry.into_duration(attempt)
|
||||
}
|
||||
}?;
|
||||
log::warn!("Attempt #{}, retrying after {}ms.", attempt, retry_duration.as_millis());
|
||||
tracing::warn!(
|
||||
"Attempt #{}, retrying after {}ms.",
|
||||
attempt,
|
||||
retry_duration.as_millis()
|
||||
);
|
||||
tokio::time::sleep(retry_duration).await;
|
||||
}
|
||||
|
||||
@ -244,7 +248,7 @@ impl Embedder {
|
||||
.map_err(EmbedError::openai_unexpected)
|
||||
.map_err(Retry::retry_later)?;
|
||||
|
||||
log::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
|
||||
tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
|
||||
|
||||
return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens(
|
||||
error_response.error,
|
||||
@ -266,7 +270,7 @@ impl Embedder {
|
||||
client: &reqwest::Client,
|
||||
) -> Result<Vec<Embeddings<f32>>, Retry> {
|
||||
for text in texts {
|
||||
log::trace!("Received prompt: {}", text.as_ref())
|
||||
tracing::trace!("Received prompt: {}", text.as_ref())
|
||||
}
|
||||
let request = OpenAiRequest {
|
||||
model: self.options.embedding_model.name(),
|
||||
@ -289,7 +293,7 @@ impl Embedder {
|
||||
.map_err(EmbedError::openai_unexpected)
|
||||
.map_err(Retry::retry_later)?;
|
||||
|
||||
log::trace!("response: {:?}", response.data);
|
||||
tracing::trace!("response: {:?}", response.data);
|
||||
|
||||
Ok(response
|
||||
.data
|
||||
|
23
tracing-trace/Cargo.toml
Normal file
23
tracing-trace/Cargo.toml
Normal file
@ -0,0 +1,23 @@
|
||||
[package]
|
||||
name = "tracing-trace"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
color-spantrace = "0.2.1"
|
||||
fxprof-processed-profile = "0.6.0"
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = "1.0.111"
|
||||
tracing = "0.1.40"
|
||||
tracing-error = "0.2.0"
|
||||
tracing-subscriber = "0.3.18"
|
||||
byte-unit = { version = "4.0.19", default-features = false, features = [
|
||||
"std",
|
||||
"serde",
|
||||
] }
|
||||
tokio = { version = "1.35.1", features = ["sync"] }
|
||||
|
||||
[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies]
|
||||
libproc = "0.14.2"
|
21
tracing-trace/src/bin/trace-to-callstats.rs
Normal file
21
tracing-trace/src/bin/trace-to-callstats.rs
Normal file
@ -0,0 +1,21 @@
|
||||
use std::ffi::OsString;
|
||||
use std::io::Write;
|
||||
|
||||
use serde_json::json;
|
||||
|
||||
fn main() {
|
||||
let input_file = std::env::args_os().nth(1).expect("missing <INPUT> file");
|
||||
let input =
|
||||
std::io::BufReader::new(std::fs::File::open(&input_file).expect("could not open <INPUT>"));
|
||||
let trace = tracing_trace::TraceReader::new(input);
|
||||
let profile = tracing_trace::processor::span_stats::to_call_stats(trace).unwrap();
|
||||
let mut output_file = OsString::new();
|
||||
output_file.push("callstats-");
|
||||
output_file.push(input_file);
|
||||
let mut output_file = std::io::BufWriter::new(std::fs::File::create(output_file).unwrap());
|
||||
for (key, value) in profile {
|
||||
serde_json::to_writer(&mut output_file, &json!({key: value})).unwrap();
|
||||
writeln!(&mut output_file).unwrap();
|
||||
}
|
||||
output_file.flush().unwrap();
|
||||
}
|
18
tracing-trace/src/bin/trace-to-firefox.rs
Normal file
18
tracing-trace/src/bin/trace-to-firefox.rs
Normal file
@ -0,0 +1,18 @@
|
||||
use std::ffi::OsString;
|
||||
use std::io::Write;
|
||||
|
||||
fn main() {
|
||||
let input_file = std::env::args_os().nth(1).expect("missing <INPUT> file");
|
||||
let input =
|
||||
std::io::BufReader::new(std::fs::File::open(&input_file).expect("could not open <INPUT>"));
|
||||
let trace = tracing_trace::TraceReader::new(input);
|
||||
let profile =
|
||||
tracing_trace::processor::firefox_profiler::to_firefox_profile(trace, "Meilisearch")
|
||||
.unwrap();
|
||||
let mut output_file = OsString::new();
|
||||
output_file.push("firefox-");
|
||||
output_file.push(input_file);
|
||||
let mut output_file = std::io::BufWriter::new(std::fs::File::create(output_file).unwrap());
|
||||
serde_json::to_writer(&mut output_file, &profile).unwrap();
|
||||
output_file.flush().unwrap();
|
||||
}
|
141
tracing-trace/src/entry.rs
Normal file
141
tracing-trace/src/entry.rs
Normal file
@ -0,0 +1,141 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::span::Id as TracingId;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum Entry {
|
||||
/// A code location was accessed for the first time
|
||||
NewCallsite(NewCallsite),
|
||||
|
||||
/// A new thread was accessed
|
||||
NewThread(NewThread),
|
||||
|
||||
/// A new call started
|
||||
NewSpan(NewSpan),
|
||||
|
||||
/// An already in-flight call started doing work.
|
||||
///
|
||||
/// For synchronous functions, open should always be followed immediately by enter, exit and close,
|
||||
/// but for asynchronous functions, work can suspend (exiting the span without closing it), and then
|
||||
/// later resume (entering the span again without opening it).
|
||||
///
|
||||
/// The timer for a span only starts when the span is entered.
|
||||
SpanEnter(SpanEnter),
|
||||
|
||||
/// An in-flight call suspended and paused work.
|
||||
///
|
||||
/// For synchronous functions, exit should always be followed immediately by close,
|
||||
/// but for asynchronous functions, work can suspend and then later resume.
|
||||
///
|
||||
/// The timer for a span pauses when the span is exited.
|
||||
SpanExit(SpanExit),
|
||||
|
||||
/// A call ended
|
||||
SpanClose(SpanClose),
|
||||
|
||||
/// An event occurred
|
||||
Event(Event),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||
pub struct SpanId(u64);
|
||||
|
||||
impl From<&TracingId> for SpanId {
|
||||
fn from(value: &TracingId) -> Self {
|
||||
Self(value.into_u64())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct NewCallsite {
|
||||
pub call_id: ResourceId,
|
||||
pub name: Cow<'static, str>,
|
||||
pub module_path: Option<Cow<'static, str>>,
|
||||
pub file: Option<Cow<'static, str>>,
|
||||
pub line: Option<u32>,
|
||||
pub target: Cow<'static, str>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct NewThread {
|
||||
pub thread_id: ResourceId,
|
||||
pub name: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
|
||||
pub struct SpanEnter {
|
||||
pub id: SpanId,
|
||||
pub time: std::time::Duration,
|
||||
pub memory: Option<MemoryStats>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
|
||||
pub struct SpanExit {
|
||||
pub id: SpanId,
|
||||
pub time: std::time::Duration,
|
||||
pub memory: Option<MemoryStats>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
|
||||
pub struct Event {
|
||||
pub call_id: ResourceId,
|
||||
pub thread_id: ResourceId,
|
||||
pub parent_id: Option<SpanId>,
|
||||
pub time: std::time::Duration,
|
||||
pub memory: Option<MemoryStats>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
|
||||
pub struct NewSpan {
|
||||
pub id: SpanId,
|
||||
pub call_id: ResourceId,
|
||||
pub parent_id: Option<SpanId>,
|
||||
pub thread_id: ResourceId,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
|
||||
pub struct SpanClose {
|
||||
pub id: SpanId,
|
||||
pub time: std::time::Duration,
|
||||
}
|
||||
|
||||
/// A struct with a memory allocation stat.
|
||||
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
|
||||
pub struct MemoryStats {
|
||||
/// Resident set size, measured in bytes.
|
||||
/// (same as VmRSS in /proc/<pid>/status).
|
||||
pub resident: u64,
|
||||
}
|
||||
|
||||
impl MemoryStats {
|
||||
#[cfg(any(target_os = "linux", target_os = "macos"))]
|
||||
pub fn fetch() -> Option<Self> {
|
||||
use libproc::libproc::pid_rusage::{pidrusage, RUsageInfoV0};
|
||||
|
||||
match pidrusage(std::process::id() as i32) {
|
||||
Ok(RUsageInfoV0 { ri_resident_size, .. }) => {
|
||||
Some(MemoryStats { resident: ri_resident_size })
|
||||
}
|
||||
Err(_) => None, /* ignoring error to avoid spamming */
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(any(target_os = "linux", target_os = "macos")))]
|
||||
pub fn fetch() -> Option<Self> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn checked_sub(self, other: Self) -> Option<Self> {
|
||||
Some(Self { resident: self.resident.checked_sub(other.resident)? })
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||
pub struct ResourceId(pub(crate) usize);
|
||||
|
||||
impl ResourceId {
|
||||
pub fn to_usize(self) -> usize {
|
||||
self.0
|
||||
}
|
||||
}
|
21
tracing-trace/src/error.rs
Normal file
21
tracing-trace/src/error.rs
Normal file
@ -0,0 +1,21 @@
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
Json(serde_json::Error),
|
||||
}
|
||||
|
||||
impl std::error::Error for Error {}
|
||||
|
||||
impl std::fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str("error de/serializing trace entry:")?;
|
||||
match self {
|
||||
Error::Json(error) => std::fmt::Display::fmt(&error, f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Error> for Error {
|
||||
fn from(value: serde_json::Error) -> Self {
|
||||
Self::Json(value)
|
||||
}
|
||||
}
|
208
tracing-trace/src/layer.rs
Normal file
208
tracing-trace/src/layer.rs
Normal file
@ -0,0 +1,208 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::io::Write;
|
||||
use std::ops::ControlFlow;
|
||||
use std::sync::RwLock;
|
||||
|
||||
use tracing::span::{Attributes, Id as TracingId};
|
||||
use tracing::{Metadata, Subscriber};
|
||||
use tracing_subscriber::layer::Context;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
use crate::entry::{
|
||||
Entry, Event, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter,
|
||||
SpanExit, SpanId,
|
||||
};
|
||||
use crate::{Error, Trace, TraceWriter};
|
||||
|
||||
/// Layer that measures the time spent in spans.
|
||||
pub struct TraceLayer {
|
||||
sender: tokio::sync::mpsc::UnboundedSender<Entry>,
|
||||
callsites: RwLock<HashMap<OpaqueIdentifier, ResourceId>>,
|
||||
start_time: std::time::Instant,
|
||||
profile_memory: bool,
|
||||
}
|
||||
|
||||
impl Trace {
|
||||
pub fn new(profile_memory: bool) -> (Self, TraceLayer) {
|
||||
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
|
||||
let trace = Trace { receiver };
|
||||
let layer = TraceLayer {
|
||||
sender,
|
||||
callsites: Default::default(),
|
||||
start_time: std::time::Instant::now(),
|
||||
profile_memory,
|
||||
};
|
||||
(trace, layer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Write> TraceWriter<W> {
|
||||
pub fn new(writer: W, profile_memory: bool) -> (Self, TraceLayer) {
|
||||
let (trace, layer) = Trace::new(profile_memory);
|
||||
(trace.into_writer(writer), layer)
|
||||
}
|
||||
|
||||
pub async fn receive(&mut self) -> Result<ControlFlow<(), ()>, Error> {
|
||||
let Some(entry) = self.receiver.recv().await else {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
};
|
||||
self.write(entry)?;
|
||||
Ok(ControlFlow::Continue(()))
|
||||
}
|
||||
|
||||
/// Panics if called from an asynchronous context
|
||||
pub fn blocking_receive(&mut self) -> Result<ControlFlow<(), ()>, Error> {
|
||||
let Some(entry) = self.receiver.blocking_recv() else {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
};
|
||||
self.write(entry)?;
|
||||
Ok(ControlFlow::Continue(()))
|
||||
}
|
||||
|
||||
pub fn write(&mut self, entry: Entry) -> Result<(), Error> {
|
||||
Ok(serde_json::ser::to_writer(&mut self.writer, &entry)?)
|
||||
}
|
||||
|
||||
pub fn try_receive(&mut self) -> Result<ControlFlow<(), ()>, Error> {
|
||||
let Ok(entry) = self.receiver.try_recv() else {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
};
|
||||
self.write(entry)?;
|
||||
Ok(ControlFlow::Continue(()))
|
||||
}
|
||||
|
||||
pub fn flush(&mut self) -> Result<(), std::io::Error> {
|
||||
self.writer.flush()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Hash)]
|
||||
enum OpaqueIdentifier {
|
||||
Thread(std::thread::ThreadId),
|
||||
Call(tracing::callsite::Identifier),
|
||||
}
|
||||
|
||||
impl TraceLayer {
|
||||
fn resource_id(&self, opaque: OpaqueIdentifier) -> Option<ResourceId> {
|
||||
self.callsites.read().unwrap().get(&opaque).copied()
|
||||
}
|
||||
|
||||
fn register_resource_id(&self, opaque: OpaqueIdentifier) -> ResourceId {
|
||||
let mut map = self.callsites.write().unwrap();
|
||||
let len = map.len();
|
||||
*map.entry(opaque).or_insert(ResourceId(len))
|
||||
}
|
||||
|
||||
fn elapsed(&self) -> std::time::Duration {
|
||||
self.start_time.elapsed()
|
||||
}
|
||||
|
||||
fn memory_stats(&self) -> Option<MemoryStats> {
|
||||
if self.profile_memory {
|
||||
MemoryStats::fetch()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn send(&self, entry: Entry) {
|
||||
// we never care that the other end hanged on us
|
||||
let _ = self.sender.send(entry);
|
||||
}
|
||||
|
||||
fn register_callsite(&self, metadata: &'static Metadata<'static>) -> ResourceId {
|
||||
let call_id = self.register_resource_id(OpaqueIdentifier::Call(metadata.callsite()));
|
||||
|
||||
let module_path = metadata.module_path();
|
||||
let file = metadata.file();
|
||||
let line = metadata.line();
|
||||
let name = metadata.name();
|
||||
let target = metadata.target();
|
||||
|
||||
self.send(Entry::NewCallsite(NewCallsite {
|
||||
call_id,
|
||||
module_path: module_path.map(Cow::Borrowed),
|
||||
file: file.map(Cow::Borrowed),
|
||||
line,
|
||||
name: Cow::Borrowed(name),
|
||||
target: Cow::Borrowed(target),
|
||||
}));
|
||||
call_id
|
||||
}
|
||||
|
||||
fn register_thread(&self) -> ResourceId {
|
||||
let thread_id = std::thread::current().id();
|
||||
let name = std::thread::current().name().map(ToOwned::to_owned);
|
||||
let thread_id = self.register_resource_id(OpaqueIdentifier::Thread(thread_id));
|
||||
self.send(Entry::NewThread(NewThread { thread_id, name }));
|
||||
thread_id
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> Layer<S> for TraceLayer
|
||||
where
|
||||
S: Subscriber,
|
||||
{
|
||||
fn on_new_span(&self, attrs: &Attributes<'_>, id: &TracingId, _ctx: Context<'_, S>) {
|
||||
let call_id = self
|
||||
.resource_id(OpaqueIdentifier::Call(attrs.metadata().callsite()))
|
||||
.unwrap_or_else(|| self.register_callsite(attrs.metadata()));
|
||||
|
||||
let thread_id = self
|
||||
.resource_id(OpaqueIdentifier::Thread(std::thread::current().id()))
|
||||
.unwrap_or_else(|| self.register_thread());
|
||||
|
||||
let parent_id = attrs
|
||||
.parent()
|
||||
.cloned()
|
||||
.or_else(|| tracing::Span::current().id())
|
||||
.map(|id| SpanId::from(&id));
|
||||
|
||||
self.send(Entry::NewSpan(NewSpan { id: id.into(), call_id, parent_id, thread_id }));
|
||||
}
|
||||
|
||||
fn on_enter(&self, id: &TracingId, _ctx: Context<'_, S>) {
|
||||
self.send(Entry::SpanEnter(SpanEnter {
|
||||
id: id.into(),
|
||||
time: self.elapsed(),
|
||||
memory: self.memory_stats(),
|
||||
}))
|
||||
}
|
||||
|
||||
fn on_exit(&self, id: &TracingId, _ctx: Context<'_, S>) {
|
||||
self.send(Entry::SpanExit(SpanExit {
|
||||
id: id.into(),
|
||||
time: self.elapsed(),
|
||||
memory: self.memory_stats(),
|
||||
}))
|
||||
}
|
||||
|
||||
fn on_event(&self, event: &tracing::Event<'_>, _ctx: Context<'_, S>) {
|
||||
let call_id = self
|
||||
.resource_id(OpaqueIdentifier::Call(event.metadata().callsite()))
|
||||
.unwrap_or_else(|| self.register_callsite(event.metadata()));
|
||||
|
||||
let thread_id = self
|
||||
.resource_id(OpaqueIdentifier::Thread(std::thread::current().id()))
|
||||
.unwrap_or_else(|| self.register_thread());
|
||||
|
||||
let parent_id = event
|
||||
.parent()
|
||||
.cloned()
|
||||
.or_else(|| tracing::Span::current().id())
|
||||
.map(|id| SpanId::from(&id));
|
||||
|
||||
self.send(Entry::Event(Event {
|
||||
call_id,
|
||||
thread_id,
|
||||
parent_id,
|
||||
time: self.elapsed(),
|
||||
memory: self.memory_stats(),
|
||||
}))
|
||||
}
|
||||
|
||||
fn on_close(&self, id: TracingId, _ctx: Context<'_, S>) {
|
||||
self.send(Entry::SpanClose(SpanClose { id: Into::into(&id), time: self.elapsed() }))
|
||||
}
|
||||
}
|
54
tracing-trace/src/lib.rs
Normal file
54
tracing-trace/src/lib.rs
Normal file
@ -0,0 +1,54 @@
|
||||
use std::io::{Read, Write};
|
||||
|
||||
use entry::Entry;
|
||||
|
||||
pub mod entry;
|
||||
mod error;
|
||||
pub mod layer;
|
||||
pub mod processor;
|
||||
|
||||
pub use error::Error;
|
||||
|
||||
pub struct TraceWriter<W: Write> {
|
||||
writer: W,
|
||||
receiver: tokio::sync::mpsc::UnboundedReceiver<Entry>,
|
||||
}
|
||||
|
||||
pub struct Trace {
|
||||
receiver: tokio::sync::mpsc::UnboundedReceiver<Entry>,
|
||||
}
|
||||
|
||||
impl Trace {
|
||||
pub fn into_receiver(self) -> tokio::sync::mpsc::UnboundedReceiver<Entry> {
|
||||
self.receiver
|
||||
}
|
||||
|
||||
pub fn into_writer<W: Write>(self, writer: W) -> TraceWriter<W> {
|
||||
TraceWriter { writer, receiver: self.receiver }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TraceReader<R: Read> {
|
||||
reader: R,
|
||||
}
|
||||
|
||||
impl<R: Read> TraceReader<R> {
|
||||
pub fn new(reader: R) -> Self {
|
||||
Self { reader }
|
||||
}
|
||||
|
||||
fn read(&mut self) -> Option<Result<Entry, Error>> {
|
||||
serde_json::Deserializer::from_reader(&mut self.reader)
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|res| res.map_err(Into::into))
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read> Iterator for TraceReader<R> {
|
||||
type Item = Result<Entry, Error>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.read()
|
||||
}
|
||||
}
|
122
tracing-trace/src/main.rs
Normal file
122
tracing-trace/src/main.rs
Normal file
@ -0,0 +1,122 @@
|
||||
use tracing::{instrument, Span};
|
||||
use tracing_error::{ErrorLayer, InstrumentResult, SpanTrace, TracedError};
|
||||
|
||||
#[instrument(level = "trace", target = "profile::indexing")]
|
||||
fn foo() -> Result<(), TracedError<Error>> {
|
||||
let _ = bar(40, 2);
|
||||
bar(40, 2)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
XTooBig,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str("x too big")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for Error {}
|
||||
|
||||
#[instrument(level = "trace", target = "profile::indexing")]
|
||||
fn bar(x: u32, y: u32) -> Result<(), TracedError<Error>> {
|
||||
let handle_ok = spawn_in_current_scope(move || baz(y));
|
||||
let handle = spawn_in_current_scope(move || baz(x + y));
|
||||
handle_ok.join().unwrap().and(handle.join().unwrap())
|
||||
}
|
||||
|
||||
pub fn spawn_in_current_scope<F, T>(f: F) -> std::thread::JoinHandle<T>
|
||||
where
|
||||
F: FnOnce() -> T + Send + 'static,
|
||||
T: Send + 'static,
|
||||
{
|
||||
let current = Span::current();
|
||||
std::thread::spawn(move || {
|
||||
let span = tracing::trace_span!(parent: ¤t, "thread_spawn", id = ?std::thread::current().id(), name = tracing::field::Empty);
|
||||
if let Some(name) = std::thread::current().name() {
|
||||
span.record("name", name);
|
||||
}
|
||||
span.in_scope(f)
|
||||
})
|
||||
}
|
||||
|
||||
#[instrument(level = "trace", target = "profile::indexing")]
|
||||
fn baz(x: u32) -> Result<(), TracedError<Error>> {
|
||||
if x > 10 {
|
||||
fibo_recursive(10);
|
||||
return Err(Error::XTooBig).in_current_span();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[instrument(level = "trace", target = "profile::indexing")]
|
||||
fn fibo_recursive(n: u32) -> u32 {
|
||||
if n == 0 {
|
||||
return 1;
|
||||
}
|
||||
if n == 1 {
|
||||
return 2;
|
||||
}
|
||||
return fibo_recursive(n - 1) - fibo_recursive(n - 2);
|
||||
}
|
||||
|
||||
use tracing_error::ExtractSpanTrace as _;
|
||||
use tracing_subscriber::layer::SubscriberExt as _;
|
||||
use tracing_trace::processor;
|
||||
|
||||
fn on_panic(info: &std::panic::PanicInfo) {
|
||||
let info = info.to_string();
|
||||
let trace = SpanTrace::capture();
|
||||
tracing::error!(%info, %trace);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let (mut trace, profiling_layer) =
|
||||
tracing_trace::TraceWriter::new(std::fs::File::create("trace.json").unwrap(), true);
|
||||
|
||||
let subscriber = tracing_subscriber::registry()
|
||||
// any number of other subscriber layers may be added before or
|
||||
// after the `ErrorLayer`...
|
||||
.with(ErrorLayer::default())
|
||||
.with(profiling_layer);
|
||||
|
||||
// set the subscriber as the default for the application
|
||||
tracing::subscriber::set_global_default(subscriber).unwrap();
|
||||
|
||||
std::panic::set_hook(Box::new(on_panic));
|
||||
|
||||
let res = foo();
|
||||
|
||||
if let Err(error) = res {
|
||||
print_extracted_spantraces(&error)
|
||||
}
|
||||
|
||||
while trace.try_receive().unwrap().is_continue() {}
|
||||
|
||||
trace.flush().unwrap();
|
||||
|
||||
let trace = tracing_trace::TraceReader::new(std::fs::File::open("trace.json").unwrap());
|
||||
|
||||
let profile = processor::firefox_profiler::to_firefox_profile(trace, "test").unwrap();
|
||||
serde_json::to_writer(std::fs::File::create("processed.json").unwrap(), &profile).unwrap();
|
||||
}
|
||||
|
||||
fn print_extracted_spantraces(error: &(dyn std::error::Error + 'static)) {
|
||||
let mut error = Some(error);
|
||||
let mut ind = 0;
|
||||
|
||||
eprintln!("Error:");
|
||||
|
||||
while let Some(err) = error {
|
||||
if let Some(spantrace) = err.span_trace() {
|
||||
eprintln!("found a spantrace:\n{}", color_spantrace::colorize(spantrace));
|
||||
} else {
|
||||
eprintln!("{:>4}: {}", ind, err);
|
||||
}
|
||||
|
||||
error = err.source();
|
||||
ind += 1;
|
||||
}
|
||||
}
|
450
tracing-trace/src/processor/firefox_profiler.rs
Normal file
450
tracing-trace/src/processor/firefox_profiler.rs
Normal file
@ -0,0 +1,450 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use fxprof_processed_profile::{
|
||||
CategoryPairHandle, CounterHandle, CpuDelta, Frame, FrameFlags, FrameInfo, MarkerDynamicField,
|
||||
MarkerFieldFormat, MarkerLocation, MarkerSchema, MarkerSchemaField, ProcessHandle, Profile,
|
||||
ProfilerMarker, ReferenceTimestamp, SamplingInterval, StringHandle, Timestamp,
|
||||
};
|
||||
use serde_json::json;
|
||||
|
||||
use crate::entry::{
|
||||
Entry, Event, MemoryStats, NewCallsite, NewSpan, ResourceId, SpanClose, SpanEnter, SpanExit,
|
||||
SpanId,
|
||||
};
|
||||
use crate::{Error, TraceReader};
|
||||
|
||||
pub fn to_firefox_profile<R: std::io::Read>(
|
||||
trace: TraceReader<R>,
|
||||
app: &str,
|
||||
) -> Result<Profile, Error> {
|
||||
let mut profile = Profile::new(
|
||||
app,
|
||||
ReferenceTimestamp::from_millis_since_unix_epoch(0.0),
|
||||
SamplingInterval::from_nanos(15),
|
||||
);
|
||||
|
||||
let mut last_timestamp = Timestamp::from_nanos_since_reference(0);
|
||||
let main = profile.add_process(app, 0, last_timestamp);
|
||||
|
||||
let mut calls = HashMap::new();
|
||||
let mut threads = HashMap::new();
|
||||
let mut spans = HashMap::new();
|
||||
|
||||
let category = profile.add_category("general", fxprof_processed_profile::CategoryColor::Blue);
|
||||
let subcategory = profile.add_subcategory(category, "subcategory");
|
||||
|
||||
let mut last_memory = MemoryStats::default();
|
||||
|
||||
let mut memory_counters = None;
|
||||
|
||||
for entry in trace {
|
||||
let entry = entry?;
|
||||
match entry {
|
||||
Entry::NewCallsite(callsite) => {
|
||||
let string_handle = profile.intern_string(callsite.name.as_ref());
|
||||
calls.insert(callsite.call_id, (callsite, string_handle));
|
||||
}
|
||||
Entry::NewThread(thread) => {
|
||||
let thread_handle = profile.add_thread(
|
||||
main,
|
||||
thread.thread_id.to_usize() as u32,
|
||||
last_timestamp,
|
||||
threads.is_empty(),
|
||||
);
|
||||
if let Some(name) = &thread.name {
|
||||
profile.set_thread_name(thread_handle, name)
|
||||
}
|
||||
threads.insert(thread.thread_id, thread_handle);
|
||||
}
|
||||
Entry::NewSpan(span) => {
|
||||
spans.insert(span.id, (span, SpanStatus::Outside));
|
||||
}
|
||||
Entry::SpanEnter(SpanEnter { id, time, memory }) => {
|
||||
let (_span, status) = spans.get_mut(&id).unwrap();
|
||||
|
||||
let SpanStatus::Outside = status else {
|
||||
continue;
|
||||
};
|
||||
|
||||
*status = SpanStatus::Inside { time, memory };
|
||||
|
||||
last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64);
|
||||
|
||||
add_memory_samples(
|
||||
&mut profile,
|
||||
main,
|
||||
memory,
|
||||
last_timestamp,
|
||||
&mut memory_counters,
|
||||
&mut last_memory,
|
||||
);
|
||||
}
|
||||
Entry::SpanExit(SpanExit { id, time, memory }) => {
|
||||
let (span, status) = spans.get_mut(&id).unwrap();
|
||||
|
||||
let SpanStatus::Inside { time: begin, memory: begin_memory } = status else {
|
||||
continue;
|
||||
};
|
||||
last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64);
|
||||
|
||||
let begin = *begin;
|
||||
let begin_memory = *begin_memory;
|
||||
|
||||
*status = SpanStatus::Outside;
|
||||
|
||||
let span = *span;
|
||||
let thread_handle = threads.get(&span.thread_id).unwrap();
|
||||
|
||||
let frames = make_frames(span, &spans, &calls, subcategory);
|
||||
|
||||
profile.add_sample(
|
||||
*thread_handle,
|
||||
to_timestamp(begin),
|
||||
frames.iter().rev().cloned(),
|
||||
CpuDelta::ZERO,
|
||||
1,
|
||||
);
|
||||
profile.add_sample(
|
||||
*thread_handle,
|
||||
to_timestamp(time),
|
||||
frames.iter().rev().cloned(),
|
||||
CpuDelta::from_nanos((time - begin).as_nanos() as u64),
|
||||
1,
|
||||
);
|
||||
|
||||
add_memory_samples(
|
||||
&mut profile,
|
||||
main,
|
||||
memory,
|
||||
last_timestamp,
|
||||
&mut memory_counters,
|
||||
&mut last_memory,
|
||||
);
|
||||
|
||||
let (callsite, _) = calls.get(&span.call_id).unwrap();
|
||||
|
||||
let memory_delta =
|
||||
begin_memory.zip(memory).and_then(|(begin, end)| end.checked_sub(begin));
|
||||
let marker = SpanMarker { callsite, span: &span, memory_delta };
|
||||
|
||||
profile.add_marker_with_stack(
|
||||
*thread_handle,
|
||||
&callsite.name,
|
||||
marker,
|
||||
fxprof_processed_profile::MarkerTiming::Interval(
|
||||
to_timestamp(begin),
|
||||
to_timestamp(time),
|
||||
),
|
||||
frames.iter().rev().cloned(),
|
||||
)
|
||||
}
|
||||
Entry::Event(event) => {
|
||||
let span = event
|
||||
.parent_id
|
||||
.as_ref()
|
||||
.and_then(|parent_id| spans.get(parent_id))
|
||||
.and_then(|(span, status)| match status {
|
||||
SpanStatus::Outside => None,
|
||||
SpanStatus::Inside { .. } => Some(span),
|
||||
})
|
||||
.copied();
|
||||
let timestamp = to_timestamp(event.time);
|
||||
|
||||
let thread_handle = threads.get(&event.thread_id).unwrap();
|
||||
|
||||
let frames = span
|
||||
.map(|span| make_frames(span, &spans, &calls, subcategory))
|
||||
.unwrap_or_default();
|
||||
|
||||
profile.add_sample(
|
||||
*thread_handle,
|
||||
timestamp,
|
||||
frames.iter().rev().cloned(),
|
||||
CpuDelta::ZERO,
|
||||
1,
|
||||
);
|
||||
|
||||
let memory_delta = add_memory_samples(
|
||||
&mut profile,
|
||||
main,
|
||||
event.memory,
|
||||
last_timestamp,
|
||||
&mut memory_counters,
|
||||
&mut last_memory,
|
||||
);
|
||||
|
||||
let (callsite, _) = calls.get(&event.call_id).unwrap();
|
||||
|
||||
let marker = EventMarker { callsite, event: &event, memory_delta };
|
||||
|
||||
profile.add_marker_with_stack(
|
||||
*thread_handle,
|
||||
&callsite.name,
|
||||
marker,
|
||||
fxprof_processed_profile::MarkerTiming::Instant(timestamp),
|
||||
frames.iter().rev().cloned(),
|
||||
);
|
||||
|
||||
last_timestamp = timestamp;
|
||||
}
|
||||
Entry::SpanClose(SpanClose { id, time }) => {
|
||||
spans.remove(&id);
|
||||
last_timestamp = to_timestamp(time);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(profile)
|
||||
}
|
||||
|
||||
struct MemoryCounterHandles {
|
||||
usage: CounterHandle,
|
||||
}
|
||||
|
||||
impl MemoryCounterHandles {
|
||||
fn new(profile: &mut Profile, main: ProcessHandle) -> Self {
|
||||
let usage =
|
||||
profile.add_counter(main, "mimmalloc", "Memory", "Amount of memory currently in use");
|
||||
Self { usage }
|
||||
}
|
||||
}
|
||||
|
||||
fn add_memory_samples(
|
||||
profile: &mut Profile,
|
||||
main: ProcessHandle,
|
||||
memory: Option<MemoryStats>,
|
||||
last_timestamp: Timestamp,
|
||||
memory_counters: &mut Option<MemoryCounterHandles>,
|
||||
last_memory: &mut MemoryStats,
|
||||
) -> Option<MemoryStats> {
|
||||
let Some(stats) = memory else {
|
||||
return None;
|
||||
};
|
||||
|
||||
let memory_counters =
|
||||
memory_counters.get_or_insert_with(|| MemoryCounterHandles::new(profile, main));
|
||||
|
||||
profile.add_counter_sample(
|
||||
memory_counters.usage,
|
||||
last_timestamp,
|
||||
stats.resident as f64 - last_memory.resident as f64,
|
||||
0,
|
||||
);
|
||||
|
||||
let delta = stats.checked_sub(*last_memory);
|
||||
*last_memory = stats;
|
||||
delta
|
||||
}
|
||||
|
||||
fn to_timestamp(time: std::time::Duration) -> Timestamp {
|
||||
Timestamp::from_nanos_since_reference(time.as_nanos() as u64)
|
||||
}
|
||||
|
||||
fn make_frames(
|
||||
span: NewSpan,
|
||||
spans: &HashMap<SpanId, (NewSpan, SpanStatus)>,
|
||||
calls: &HashMap<ResourceId, (NewCallsite, StringHandle)>,
|
||||
subcategory: CategoryPairHandle,
|
||||
) -> Vec<FrameInfo> {
|
||||
let mut frames = Vec::new();
|
||||
let mut current_span = span;
|
||||
loop {
|
||||
let frame = make_frame(current_span, calls, subcategory);
|
||||
frames.push(frame);
|
||||
if let Some(parent) = current_span.parent_id {
|
||||
current_span = spans.get(&parent).unwrap().0;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
frames
|
||||
}
|
||||
|
||||
fn make_frame(
|
||||
span: NewSpan,
|
||||
calls: &HashMap<ResourceId, (NewCallsite, StringHandle)>,
|
||||
subcategory: CategoryPairHandle,
|
||||
) -> FrameInfo {
|
||||
let (_, call) = calls.get(&span.call_id).unwrap();
|
||||
FrameInfo { frame: Frame::Label(*call), category_pair: subcategory, flags: FrameFlags::empty() }
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum SpanStatus {
|
||||
Outside,
|
||||
Inside { time: std::time::Duration, memory: Option<MemoryStats> },
|
||||
}
|
||||
|
||||
struct SpanMarker<'a> {
|
||||
span: &'a NewSpan,
|
||||
callsite: &'a NewCallsite,
|
||||
memory_delta: Option<MemoryStats>,
|
||||
}
|
||||
|
||||
impl<'a> ProfilerMarker for SpanMarker<'a> {
|
||||
const MARKER_TYPE_NAME: &'static str = "span";
|
||||
|
||||
fn schema() -> MarkerSchema {
|
||||
let fields = vec![
|
||||
MarkerSchemaField::Dynamic(MarkerDynamicField {
|
||||
key: "filename",
|
||||
label: "File name",
|
||||
format: MarkerFieldFormat::FilePath,
|
||||
searchable: true,
|
||||
}),
|
||||
MarkerSchemaField::Dynamic(MarkerDynamicField {
|
||||
key: "line",
|
||||
label: "Line",
|
||||
format: MarkerFieldFormat::Integer,
|
||||
searchable: true,
|
||||
}),
|
||||
MarkerSchemaField::Dynamic(MarkerDynamicField {
|
||||
key: "module_path",
|
||||
label: "Module path",
|
||||
format: MarkerFieldFormat::String,
|
||||
searchable: true,
|
||||
}),
|
||||
MarkerSchemaField::Dynamic(MarkerDynamicField {
|
||||
key: "span_id",
|
||||
label: "Span ID",
|
||||
format: MarkerFieldFormat::Integer,
|
||||
searchable: true,
|
||||
}),
|
||||
MarkerSchemaField::Dynamic(MarkerDynamicField {
|
||||
key: "thread_id",
|
||||
label: "Thread ID",
|
||||
format: MarkerFieldFormat::Integer,
|
||||
searchable: true,
|
||||
}),
|
||||
MarkerSchemaField::Dynamic(MarkerDynamicField {
|
||||
key: "resident",
|
||||
label: "Resident set size, measured in bytes while this function was executing",
|
||||
format: MarkerFieldFormat::Bytes,
|
||||
searchable: false,
|
||||
}),
|
||||
];
|
||||
|
||||
MarkerSchema {
|
||||
type_name: Self::MARKER_TYPE_NAME,
|
||||
locations: vec![
|
||||
MarkerLocation::MarkerTable,
|
||||
MarkerLocation::MarkerChart,
|
||||
MarkerLocation::TimelineOverview,
|
||||
],
|
||||
chart_label: None,
|
||||
tooltip_label: Some("{marker.name} - {marker.data.filename}:{marker.data.line}"),
|
||||
table_label: Some("{marker.data.filename}:{marker.data.line}"),
|
||||
fields,
|
||||
}
|
||||
}
|
||||
|
||||
fn json_marker_data(&self) -> serde_json::Value {
|
||||
let filename = self.callsite.file.as_deref();
|
||||
let line = self.callsite.line;
|
||||
let module_path = self.callsite.module_path.as_deref();
|
||||
let span_id = self.span.id;
|
||||
let thread_id = self.span.thread_id;
|
||||
|
||||
let mut value = json!({
|
||||
"type": Self::MARKER_TYPE_NAME,
|
||||
"filename": filename,
|
||||
"line": line,
|
||||
"module_path": module_path,
|
||||
"span_id": span_id,
|
||||
"thread_id": thread_id,
|
||||
});
|
||||
|
||||
if let Some(MemoryStats { resident }) = self.memory_delta {
|
||||
value["resident"] = json!(resident);
|
||||
}
|
||||
|
||||
value
|
||||
}
|
||||
}
|
||||
|
||||
struct EventMarker<'a> {
|
||||
event: &'a Event,
|
||||
callsite: &'a NewCallsite,
|
||||
memory_delta: Option<MemoryStats>,
|
||||
}
|
||||
|
||||
impl<'a> ProfilerMarker for EventMarker<'a> {
|
||||
const MARKER_TYPE_NAME: &'static str = "tracing-event";
|
||||
|
||||
fn schema() -> MarkerSchema {
|
||||
let fields = vec![
|
||||
MarkerSchemaField::Dynamic(MarkerDynamicField {
|
||||
key: "filename",
|
||||
label: "File name",
|
||||
format: MarkerFieldFormat::FilePath,
|
||||
searchable: true,
|
||||
}),
|
||||
MarkerSchemaField::Dynamic(MarkerDynamicField {
|
||||
key: "line",
|
||||
label: "Line",
|
||||
format: MarkerFieldFormat::Integer,
|
||||
searchable: true,
|
||||
}),
|
||||
MarkerSchemaField::Dynamic(MarkerDynamicField {
|
||||
key: "module_path",
|
||||
label: "Module path",
|
||||
format: MarkerFieldFormat::String,
|
||||
searchable: true,
|
||||
}),
|
||||
MarkerSchemaField::Dynamic(MarkerDynamicField {
|
||||
key: "parent_span_id",
|
||||
label: "Parent Span ID",
|
||||
format: MarkerFieldFormat::Integer,
|
||||
searchable: true,
|
||||
}),
|
||||
MarkerSchemaField::Dynamic(MarkerDynamicField {
|
||||
key: "thread_id",
|
||||
label: "Thread ID",
|
||||
format: MarkerFieldFormat::Integer,
|
||||
searchable: true,
|
||||
}),
|
||||
MarkerSchemaField::Dynamic(MarkerDynamicField {
|
||||
key: "resident",
|
||||
label: "Resident set size, measured in bytes while this function was executing",
|
||||
format: MarkerFieldFormat::Bytes,
|
||||
searchable: false,
|
||||
}),
|
||||
];
|
||||
|
||||
MarkerSchema {
|
||||
type_name: Self::MARKER_TYPE_NAME,
|
||||
locations: vec![
|
||||
MarkerLocation::MarkerTable,
|
||||
MarkerLocation::MarkerChart,
|
||||
MarkerLocation::TimelineOverview,
|
||||
],
|
||||
chart_label: None,
|
||||
tooltip_label: Some("{marker.name} - {marker.data.filename}:{marker.data.line}"),
|
||||
table_label: Some("{marker.data.filename}:{marker.data.line}"),
|
||||
fields,
|
||||
}
|
||||
}
|
||||
|
||||
fn json_marker_data(&self) -> serde_json::Value {
|
||||
let filename = self.callsite.file.as_deref();
|
||||
let line = self.callsite.line;
|
||||
let module_path = self.callsite.module_path.as_deref();
|
||||
let span_id = self.event.parent_id;
|
||||
let thread_id = self.event.thread_id;
|
||||
|
||||
let mut value = json!({
|
||||
"type": Self::MARKER_TYPE_NAME,
|
||||
"filename": filename,
|
||||
"line": line,
|
||||
"module_path": module_path,
|
||||
"parent_span_id": span_id,
|
||||
"thread_id": thread_id,
|
||||
});
|
||||
|
||||
if let Some(MemoryStats { resident }) = self.memory_delta {
|
||||
value["resident"] = json!(resident);
|
||||
}
|
||||
|
||||
value
|
||||
}
|
||||
}
|
195
tracing-trace/src/processor/fmt.rs
Normal file
195
tracing-trace/src/processor/fmt.rs
Normal file
@ -0,0 +1,195 @@
|
||||
use std::collections::HashMap;
|
||||
use std::io::Read;
|
||||
|
||||
use crate::entry::{
|
||||
Entry, Event, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter,
|
||||
SpanExit, SpanId,
|
||||
};
|
||||
use crate::{Error, TraceReader};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum SpanStatus {
|
||||
Outside,
|
||||
Inside(std::time::Duration),
|
||||
}
|
||||
|
||||
pub fn print_trace<R: Read>(trace: TraceReader<R>) -> Result<(), Error> {
|
||||
let mut calls = HashMap::new();
|
||||
let mut threads = HashMap::new();
|
||||
let mut spans = HashMap::new();
|
||||
for entry in trace {
|
||||
let entry = entry?;
|
||||
match entry {
|
||||
Entry::NewCallsite(callsite) => {
|
||||
calls.insert(callsite.call_id, callsite);
|
||||
}
|
||||
Entry::NewThread(NewThread { thread_id, name }) => {
|
||||
threads.insert(thread_id, name);
|
||||
}
|
||||
Entry::NewSpan(span) => {
|
||||
spans.insert(span.id, (span, SpanStatus::Outside));
|
||||
}
|
||||
Entry::SpanEnter(SpanEnter { id, time, memory }) => {
|
||||
let (span, status) = spans.get_mut(&id).unwrap();
|
||||
|
||||
let SpanStatus::Outside = status else {
|
||||
continue;
|
||||
};
|
||||
|
||||
*status = SpanStatus::Inside(time);
|
||||
|
||||
let span = *span;
|
||||
|
||||
match memory {
|
||||
Some(stats) => println!(
|
||||
"[{}]{}::{} ({}) <-",
|
||||
print_thread(&threads, span.thread_id),
|
||||
print_backtrace(&spans, &calls, &span),
|
||||
print_span(&calls, &span),
|
||||
print_memory(stats),
|
||||
),
|
||||
None => println!(
|
||||
"[{}]{}::{} <-",
|
||||
print_thread(&threads, span.thread_id),
|
||||
print_backtrace(&spans, &calls, &span),
|
||||
print_span(&calls, &span),
|
||||
),
|
||||
}
|
||||
}
|
||||
Entry::SpanExit(SpanExit { id, time, memory }) => {
|
||||
let (span, status) = spans.get_mut(&id).unwrap();
|
||||
|
||||
let SpanStatus::Inside(begin) = status else {
|
||||
continue;
|
||||
};
|
||||
let begin = *begin;
|
||||
|
||||
*status = SpanStatus::Outside;
|
||||
|
||||
let span = *span;
|
||||
|
||||
match memory {
|
||||
Some(stats) => println!(
|
||||
"[{}]{}::{} ({}) -> {}",
|
||||
print_thread(&threads, span.thread_id),
|
||||
print_backtrace(&spans, &calls, &span),
|
||||
print_span(&calls, &span),
|
||||
print_memory(stats),
|
||||
print_duration(time - begin),
|
||||
),
|
||||
None => println!(
|
||||
"[{}]{}::{} -> {}",
|
||||
print_thread(&threads, span.thread_id),
|
||||
print_backtrace(&spans, &calls, &span),
|
||||
print_span(&calls, &span),
|
||||
print_duration(time - begin),
|
||||
),
|
||||
}
|
||||
}
|
||||
Entry::SpanClose(SpanClose { id, time: _ }) => {
|
||||
spans.remove(&id);
|
||||
}
|
||||
Entry::Event(Event { call_id, thread_id, parent_id, time: _, memory }) => {
|
||||
let parent_span = parent_id.and_then(|parent_id| spans.get(&parent_id)).and_then(
|
||||
|(span, status)| match status {
|
||||
SpanStatus::Outside => None,
|
||||
SpanStatus::Inside(_) => Some(span),
|
||||
},
|
||||
);
|
||||
match (parent_span, memory) {
|
||||
(Some(parent_span), Some(stats)) => println!(
|
||||
"[{}]{}::{} ({}) event: {}",
|
||||
print_thread(&threads, thread_id),
|
||||
print_backtrace(&spans, &calls, parent_span),
|
||||
print_span(&calls, parent_span),
|
||||
print_memory(stats),
|
||||
print_call(&calls, call_id)
|
||||
),
|
||||
(Some(parent_span), None) => println!(
|
||||
"[{}]{}::{} event: {}",
|
||||
print_thread(&threads, thread_id),
|
||||
print_backtrace(&spans, &calls, parent_span),
|
||||
print_span(&calls, parent_span),
|
||||
print_call(&calls, call_id)
|
||||
),
|
||||
(None, None) => println!(
|
||||
"[{}] event: {}",
|
||||
print_thread(&threads, thread_id),
|
||||
print_call(&calls, call_id)
|
||||
),
|
||||
(None, Some(stats)) => println!(
|
||||
"[{}] ({}) event: {}",
|
||||
print_thread(&threads, thread_id),
|
||||
print_memory(stats),
|
||||
print_call(&calls, call_id)
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn print_thread(threads: &HashMap<ResourceId, Option<String>>, thread_id: ResourceId) -> String {
|
||||
let thread = threads.get(&thread_id).unwrap();
|
||||
let thread =
|
||||
thread.as_ref().cloned().unwrap_or_else(|| format!("ThreadId({})", thread_id.to_usize()));
|
||||
thread
|
||||
}
|
||||
|
||||
fn print_backtrace(
|
||||
spans: &HashMap<SpanId, (NewSpan, SpanStatus)>,
|
||||
calls: &HashMap<ResourceId, NewCallsite>,
|
||||
span: &NewSpan,
|
||||
) -> String {
|
||||
let mut parents = Vec::new();
|
||||
let mut current = span.parent_id;
|
||||
while let Some(current_id) = ¤t {
|
||||
let (span, _) = spans.get(current_id).unwrap();
|
||||
let callsite = calls.get(&span.call_id).unwrap();
|
||||
parents.push(callsite.name.clone());
|
||||
|
||||
current = span.parent_id;
|
||||
}
|
||||
|
||||
let x: Vec<String> = parents.into_iter().rev().map(|x| x.to_string()).collect();
|
||||
x.join("::")
|
||||
}
|
||||
|
||||
fn print_span(calls: &HashMap<ResourceId, NewCallsite>, span: &NewSpan) -> String {
|
||||
print_call(calls, span.call_id)
|
||||
}
|
||||
|
||||
fn print_call(calls: &HashMap<ResourceId, NewCallsite>, call_id: ResourceId) -> String {
|
||||
let callsite = calls.get(&call_id).unwrap();
|
||||
match (callsite.file.clone(), callsite.line) {
|
||||
(Some(file), None) => format!("{} ({})", callsite.name, file),
|
||||
(Some(file), Some(line)) => format!("{} ({}:{})", callsite.name, file, line),
|
||||
_ => callsite.name.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn print_duration(duration: std::time::Duration) -> String {
|
||||
if duration.as_nanos() < 1000 {
|
||||
format!("{}ns", duration.as_nanos())
|
||||
} else if duration.as_micros() < 1000 {
|
||||
format!("{}μs", duration.as_micros())
|
||||
} else if duration.as_millis() < 1000 {
|
||||
format!("{}ms", duration.as_millis())
|
||||
} else if duration.as_secs() < 120 {
|
||||
format!("{}s", duration.as_secs())
|
||||
} else if duration.as_secs_f64() / 60.0 < 60.0 {
|
||||
format!("{}min", duration.as_secs_f64() / 60.0)
|
||||
} else if duration.as_secs_f64() / 3600.0 < 8.0 {
|
||||
format!("{}h", duration.as_secs_f64() / 3600.0)
|
||||
} else {
|
||||
format!("{}d", duration.as_secs_f64() / 3600.0 / 24.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Format only the allocated bytes, deallocated bytes and reallocated bytes in GiB, MiB, KiB, Bytes.
|
||||
fn print_memory(MemoryStats { resident }: MemoryStats) -> String {
|
||||
use byte_unit::Byte;
|
||||
let rss_bytes = Byte::from_bytes(resident).get_appropriate_unit(true);
|
||||
format!("RSS {rss_bytes:.2}")
|
||||
}
|
3
tracing-trace/src/processor/mod.rs
Normal file
3
tracing-trace/src/processor/mod.rs
Normal file
@ -0,0 +1,3 @@
|
||||
pub mod firefox_profiler;
|
||||
pub mod fmt;
|
||||
pub mod span_stats;
|
79
tracing-trace/src/processor/span_stats.rs
Normal file
79
tracing-trace/src/processor/span_stats.rs
Normal file
@ -0,0 +1,79 @@
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::time::Duration;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::entry::{Entry, NewCallsite, SpanClose, SpanEnter, SpanExit};
|
||||
use crate::{Error, TraceReader};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum SpanStatus {
|
||||
Outside,
|
||||
Inside(std::time::Duration),
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct CallStats {
|
||||
pub call_count: usize,
|
||||
pub time: u64,
|
||||
}
|
||||
|
||||
pub fn to_call_stats<R: std::io::Read>(
|
||||
trace: TraceReader<R>,
|
||||
) -> Result<BTreeMap<String, CallStats>, Error> {
|
||||
let mut calls = HashMap::new();
|
||||
let mut spans = HashMap::new();
|
||||
for entry in trace {
|
||||
let entry = entry?;
|
||||
match entry {
|
||||
Entry::NewCallsite(callsite) => {
|
||||
calls.insert(callsite.call_id, (callsite, vec![]));
|
||||
}
|
||||
Entry::NewThread(_) => {}
|
||||
Entry::NewSpan(span) => {
|
||||
spans.insert(span.id, (span, SpanStatus::Outside));
|
||||
}
|
||||
Entry::SpanEnter(SpanEnter { id, time, memory: _ }) => {
|
||||
let (_, status) = spans.get_mut(&id).unwrap();
|
||||
|
||||
let SpanStatus::Outside = status else {
|
||||
continue;
|
||||
};
|
||||
|
||||
*status = SpanStatus::Inside(time);
|
||||
}
|
||||
Entry::SpanExit(SpanExit { id, time: end, memory: _ }) => {
|
||||
let (span, status) = spans.get_mut(&id).unwrap();
|
||||
|
||||
let SpanStatus::Inside(begin) = status else {
|
||||
continue;
|
||||
};
|
||||
let begin = *begin;
|
||||
|
||||
*status = SpanStatus::Outside;
|
||||
|
||||
let span = *span;
|
||||
let (_, call_list) = calls.get_mut(&span.call_id).unwrap();
|
||||
call_list.push(end - begin);
|
||||
}
|
||||
Entry::SpanClose(SpanClose { id, time: _ }) => {
|
||||
spans.remove(&id);
|
||||
}
|
||||
Entry::Event(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(calls
|
||||
.into_iter()
|
||||
.map(|(_, (call_site, calls))| (site_to_string(call_site), calls_to_stats(calls)))
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn site_to_string(call_site: NewCallsite) -> String {
|
||||
format!("{}::{}", call_site.target, call_site.name)
|
||||
}
|
||||
fn calls_to_stats(calls: Vec<Duration>) -> CallStats {
|
||||
let nb = calls.len();
|
||||
let sum: Duration = calls.iter().sum();
|
||||
CallStats { call_count: nb, time: sum.as_nanos() as u64 }
|
||||
}
|
Loading…
Reference in New Issue
Block a user