Merge branch 'meilisearch:main' into enhancement-4379-script-correction-when-musl-present

This commit is contained in:
Mathias Vandaele 2024-02-26 19:17:32 +01:00 committed by GitHub
commit c65d3d0314
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
103 changed files with 4263 additions and 868 deletions

View File

@ -110,6 +110,25 @@ jobs:
--milestone $MILESTONE_VERSION \
--assignee curquiza
create-update-version-issue:
needs: get-release-version
# Create the changelog issue if the release is not only a patch release
if: github.event.action == 'created'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-version-issue.md > $ISSUE_TEMPLATE
- name: Create the issue
run: |
gh issue create \
--title "Update version in Cargo.toml for $MILESTONE_VERSION" \
--label 'maintenance' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION
# ----------------
# MILESTONE CLOSED
# ----------------

490
Cargo.lock generated
View File

@ -36,16 +36,16 @@ dependencies = [
[[package]]
name = "actix-http"
version = "3.5.1"
version = "3.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "129d4c88e98860e1758c5de288d1632b07970a16d59bdf7b8d66053d582bb71f"
checksum = "d223b13fd481fc0d1f83bb12659ae774d9e3601814c68a0bc539731698cca743"
dependencies = [
"actix-codec",
"actix-rt",
"actix-service",
"actix-tls",
"actix-utils",
"ahash 0.8.3",
"ahash 0.8.8",
"base64 0.21.7",
"bitflags 2.4.1",
"brotli",
@ -138,9 +138,9 @@ dependencies = [
[[package]]
name = "actix-tls"
version = "3.1.1"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72616e7fbec0aa99c6f3164677fa48ff5a60036d0799c98cab894a44f3e0efc3"
checksum = "d4cce60a2f2b477bc72e5cde0af1812a6e82d8fd85b5570a5dcf2a5bf2c5be5f"
dependencies = [
"actix-rt",
"actix-service",
@ -148,13 +148,11 @@ dependencies = [
"futures-core",
"impl-more",
"pin-project-lite",
"rustls 0.21.6",
"rustls-webpki",
"tokio",
"tokio-rustls 0.23.4",
"tokio-rustls",
"tokio-util",
"tracing",
"webpki-roots 0.22.6",
"webpki-roots",
]
[[package]]
@ -169,9 +167,9 @@ dependencies = [
[[package]]
name = "actix-web"
version = "4.4.1"
version = "4.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e43428f3bf11dee6d166b00ec2df4e3aa8cc1606aaa0b7433c146852e2f4e03b"
checksum = "43a6556ddebb638c2358714d853257ed226ece6023ef9364f23f0c70737ea984"
dependencies = [
"actix-codec",
"actix-http",
@ -183,7 +181,7 @@ dependencies = [
"actix-tls",
"actix-utils",
"actix-web-codegen",
"ahash 0.8.3",
"ahash 0.8.8",
"bytes",
"bytestring",
"cfg-if",
@ -270,14 +268,15 @@ dependencies = [
[[package]]
name = "ahash"
version = "0.8.3"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff"
dependencies = [
"cfg-if",
"getrandom",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
@ -494,7 +493,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]]
name = "benchmarks"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"anyhow",
"bytes",
@ -535,6 +534,26 @@ dependencies = [
"serde",
]
[[package]]
name = "bindgen"
version = "0.68.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078"
dependencies = [
"bitflags 2.4.1",
"cexpr",
"clang-sys",
"lazy_static",
"lazycell",
"peeking_take_while",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn 2.0.48",
]
[[package]]
name = "bit-set"
version = "0.5.3"
@ -814,9 +833,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
version = "1.0.82"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "305fe645edc1442a0fa8b6726ba61d422798d37a52e12eaecf4b022ebbb88f01"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"jobserver",
"libc",
@ -831,6 +850,15 @@ dependencies = [
"smallvec",
]
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
@ -915,6 +943,17 @@ dependencies = [
"inout",
]
[[package]]
name = "clang-sys"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1"
dependencies = [
"glob",
"libc",
"libloading",
]
[[package]]
name = "clap"
version = "4.4.17"
@ -961,6 +1000,18 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15"
[[package]]
name = "color-spantrace"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd6be1b2a7e382e2b98b43b2adcca6bb0e465af0bdd38123873ae61eb17a72c2"
dependencies = [
"once_cell",
"owo-colors",
"tracing-core",
"tracing-error",
]
[[package]]
name = "colorchoice"
version = "1.0.0"
@ -1286,6 +1337,15 @@ dependencies = [
"syn 2.0.48",
]
[[package]]
name = "debugid"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d"
dependencies = [
"uuid",
]
[[package]]
name = "deduplicating_array"
version = "0.1.5"
@ -1476,13 +1536,12 @@ dependencies = [
[[package]]
name = "dump"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"anyhow",
"big_s",
"flate2",
"http 0.2.11",
"log",
"maplit",
"meili-snap",
"meilisearch-auth",
@ -1496,6 +1555,7 @@ dependencies = [
"tempfile",
"thiserror",
"time",
"tracing",
"uuid",
]
@ -1720,7 +1780,7 @@ dependencies = [
[[package]]
name = "file-store"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"faux",
"tempfile",
@ -1742,7 +1802,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"insta",
"nom",
@ -1773,7 +1833,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"criterion",
"serde_json",
@ -1891,7 +1951,7 @@ dependencies = [
[[package]]
name = "fuzzers"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"arbitrary",
"clap",
@ -1911,6 +1971,19 @@ dependencies = [
"byteorder",
]
[[package]]
name = "fxprof-processed-profile"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27d12c0aed7f1e24276a241aadc4cb8ea9f83000f34bc062b7cc2d51e3b0fabd"
dependencies = [
"bitflags 2.4.1",
"debugid",
"fxhash",
"serde",
"serde_json",
]
[[package]]
name = "gemm"
version = "0.17.0"
@ -2052,8 +2125,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
dependencies = [
"cfg-if",
"js-sys",
"libc",
"wasi",
"wasm-bindgen",
]
[[package]]
@ -2177,7 +2252,7 @@ dependencies = [
"atomic-polyfill",
"hash32",
"rustc_version",
"spin 0.9.8",
"spin",
"stable_deref_trait",
]
@ -2346,9 +2421,9 @@ dependencies = [
"futures-util",
"http 0.2.11",
"hyper",
"rustls 0.21.6",
"rustls",
"tokio",
"tokio-rustls 0.24.1",
"tokio-rustls",
]
[[package]]
@ -2856,7 +2931,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
[[package]]
name = "index-scheduler"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"anyhow",
"big_s",
@ -2869,11 +2944,9 @@ dependencies = [
"file-store",
"flate2",
"insta",
"log",
"meili-snap",
"meilisearch-auth",
"meilisearch-types",
"nelson",
"page_size 0.5.0",
"puffin",
"roaring",
@ -2883,6 +2956,7 @@ dependencies = [
"tempfile",
"thiserror",
"time",
"tracing",
"ureq",
"uuid",
]
@ -3043,7 +3117,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"criterion",
"serde_json",
@ -3051,13 +3125,14 @@ dependencies = [
[[package]]
name = "jsonwebtoken"
version = "8.3.0"
version = "9.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378"
checksum = "5c7ea04a7c5c055c175f189b6dc6ba036fd62306b58c66c9f6389036c503a3f4"
dependencies = [
"base64 0.21.7",
"js-sys",
"pem",
"ring 0.16.20",
"ring",
"serde",
"serde_json",
"simple_asn1",
@ -3085,6 +3160,12 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lazycell"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "levenshtein_automata"
version = "0.2.1"
@ -3112,6 +3193,16 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "libloading"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161"
dependencies = [
"cfg-if",
"windows-sys 0.48.0",
]
[[package]]
name = "libm"
version = "0.2.7"
@ -3128,6 +3219,17 @@ dependencies = [
"libc",
]
[[package]]
name = "libproc"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "229004ebba9d1d5caf41623f1523b6d52abb47d9f6ab87f7e6fc992e3b854aef"
dependencies = [
"bindgen",
"errno",
"libc",
]
[[package]]
name = "libz-sys"
version = "1.1.12"
@ -3555,7 +3657,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "meili-snap"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"insta",
"md5",
@ -3564,7 +3666,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"actix-cors",
"actix-http",
@ -3586,7 +3688,6 @@ dependencies = [
"deserr",
"dump",
"either",
"env_logger",
"file-store",
"flate2",
"fst",
@ -3601,7 +3702,6 @@ dependencies = [
"itertools 0.11.0",
"jsonwebtoken",
"lazy_static",
"log",
"manifest-dir-macros",
"maplit",
"meili-snap",
@ -3623,7 +3723,7 @@ dependencies = [
"rayon",
"regex",
"reqwest",
"rustls 0.20.9",
"rustls",
"rustls-pemfile",
"segment",
"serde",
@ -3644,6 +3744,10 @@ dependencies = [
"tokio",
"tokio-stream",
"toml",
"tracing",
"tracing-actix-web",
"tracing-subscriber",
"tracing-trace",
"url",
"urlencoding",
"uuid",
@ -3655,7 +3759,7 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"base64 0.21.7",
"enum-iterator",
@ -3674,7 +3778,7 @@ dependencies = [
[[package]]
name = "meilisearch-types"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"actix-web",
"anyhow",
@ -3704,7 +3808,7 @@ dependencies = [
[[package]]
name = "meilitool"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"anyhow",
"clap",
@ -3743,7 +3847,7 @@ dependencies = [
[[package]]
name = "milli"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"arroy",
"big_s",
@ -3776,7 +3880,6 @@ dependencies = [
"json-depth-checker",
"levenshtein_automata",
"liquid",
"log",
"logging_timer",
"maplit",
"md5",
@ -3805,6 +3908,7 @@ dependencies = [
"time",
"tokenizers",
"tokio",
"tracing",
"uuid",
]
@ -3882,9 +3986,10 @@ dependencies = [
]
[[package]]
name = "nelson"
version = "0.1.0"
source = "git+https://github.com/meilisearch/nelson.git?rev=675f13885548fb415ead8fbb447e9e6d9314000a#675f13885548fb415ead8fbb447e9e6d9314000a"
name = "mutually_exclusive_features"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e"
[[package]]
name = "nom"
@ -3916,6 +4021,16 @@ dependencies = [
"winapi",
]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
dependencies = [
"overload",
"winapi",
]
[[package]]
name = "num-bigint"
version = "0.4.3"
@ -4037,6 +4152,18 @@ dependencies = [
"num-traits",
]
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "owo-colors"
version = "3.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f"
[[package]]
name = "page_size"
version = "0.5.0"
@ -4125,12 +4252,19 @@ dependencies = [
]
[[package]]
name = "pem"
version = "1.1.1"
name = "peeking_take_while"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8835c273a76a90455d7344889b0964598e3316e2a79ede8e36f16bdcf2228b8"
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]]
name = "pem"
version = "3.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310"
dependencies = [
"base64 0.13.1",
"base64 0.21.7",
"serde",
]
[[package]]
@ -4141,7 +4275,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "permissive-json-pointer"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"big_s",
"serde_json",
@ -4243,6 +4377,26 @@ dependencies = [
"siphasher 0.3.11",
]
[[package]]
name = "pin-project"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0302c4a0442c456bd56f841aee5c3bfd17967563f6fadc9ceb9f9c23cf3807e0"
dependencies = [
"pin-project-internal",
]
[[package]]
name = "pin-project-internal"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.48",
]
[[package]]
name = "pin-project-lite"
version = "0.2.13"
@ -4641,20 +4795,20 @@ dependencies = [
"once_cell",
"percent-encoding",
"pin-project-lite",
"rustls 0.21.6",
"rustls",
"rustls-pemfile",
"serde",
"serde_json",
"serde_urlencoded",
"system-configuration",
"tokio",
"tokio-rustls 0.24.1",
"tokio-rustls",
"tower-service",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
"webpki-roots 0.25.3",
"webpki-roots",
"winreg",
]
@ -4672,30 +4826,15 @@ checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
[[package]]
name = "ring"
version = "0.16.20"
version = "0.17.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc"
dependencies = [
"cc",
"libc",
"once_cell",
"spin 0.5.2",
"untrusted 0.7.1",
"web-sys",
"winapi",
]
[[package]]
name = "ring"
version = "0.17.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9babe80d5c16becf6594aa32ad2be8fe08498e7ae60b77de8df700e67f191d7e"
checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74"
dependencies = [
"cc",
"getrandom",
"libc",
"spin 0.9.8",
"untrusted 0.9.0",
"spin",
"untrusted",
"windows-sys 0.48.0",
]
@ -4773,24 +4912,12 @@ dependencies = [
[[package]]
name = "rustls"
version = "0.20.9"
version = "0.21.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b80e3dec595989ea8510028f30c408a4630db12c9cbb8de34203b89d6577e99"
checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba"
dependencies = [
"log",
"ring 0.16.20",
"sct",
"webpki",
]
[[package]]
name = "rustls"
version = "0.21.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d1feddffcfcc0b33f5c6ce9a29e341e4cd59c3f78e7ee45f4a40c038b1d6cbb"
dependencies = [
"log",
"ring 0.16.20",
"ring",
"rustls-webpki",
"sct",
]
@ -4810,8 +4937,8 @@ version = "0.101.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
dependencies = [
"ring 0.17.3",
"untrusted 0.9.0",
"ring",
"untrusted",
]
[[package]]
@ -4853,12 +4980,12 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "sct"
version = "0.7.0"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4"
checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
dependencies = [
"ring 0.16.20",
"untrusted 0.7.1",
"ring",
"untrusted",
]
[[package]]
@ -4994,6 +5121,21 @@ dependencies = [
"digest",
]
[[package]]
name = "sharded-slab"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
dependencies = [
"lazy_static",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "signal-hook-registry"
version = "1.4.1"
@ -5109,12 +5251,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "spin"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
[[package]]
name = "spin"
version = "0.9.8"
@ -5326,6 +5462,16 @@ dependencies = [
"syn 2.0.48",
]
[[package]]
name = "thread_local"
version = "1.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]]
name = "tiktoken-rs"
version = "0.5.8"
@ -5466,24 +5612,13 @@ dependencies = [
"syn 2.0.48",
]
[[package]]
name = "tokio-rustls"
version = "0.23.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59"
dependencies = [
"rustls 0.20.9",
"tokio",
"webpki",
]
[[package]]
name = "tokio-rustls"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
dependencies = [
"rustls 0.21.6",
"rustls",
"tokio",
]
@ -5554,17 +5689,29 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
[[package]]
name = "tracing"
version = "0.1.37"
version = "0.1.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
dependencies = [
"cfg-if",
"log",
"pin-project-lite",
"tracing-attributes",
"tracing-core",
]
[[package]]
name = "tracing-actix-web"
version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fe0d5feac3f4ca21ba33496bcb1ccab58cca6412b1405ae80f0581541e0ca78"
dependencies = [
"actix-web",
"mutually_exclusive_features",
"pin-project",
"tracing",
"uuid",
]
[[package]]
name = "tracing-attributes"
version = "0.1.27"
@ -5578,11 +5725,63 @@ dependencies = [
[[package]]
name = "tracing-core"
version = "0.1.31"
version = "0.1.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a"
checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
dependencies = [
"once_cell",
"valuable",
]
[[package]]
name = "tracing-error"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e"
dependencies = [
"tracing",
"tracing-subscriber",
]
[[package]]
name = "tracing-log"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"log",
"once_cell",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
dependencies = [
"nu-ansi-term",
"sharded-slab",
"smallvec",
"thread_local",
"tracing-core",
"tracing-log",
]
[[package]]
name = "tracing-trace"
version = "0.1.0"
dependencies = [
"byte-unit",
"color-spantrace",
"fxprof-processed-profile",
"libproc",
"serde",
"serde_json",
"tokio",
"tracing",
"tracing-error",
"tracing-subscriber",
]
[[package]]
@ -5675,12 +5874,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "untrusted"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
[[package]]
name = "untrusted"
version = "0.9.0"
@ -5697,13 +5890,13 @@ dependencies = [
"flate2",
"log",
"once_cell",
"rustls 0.21.6",
"rustls",
"rustls-webpki",
"serde",
"serde_json",
"socks",
"url",
"webpki-roots 0.25.3",
"webpki-roots",
]
[[package]]
@ -5758,6 +5951,12 @@ dependencies = [
"serde",
]
[[package]]
name = "valuable"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]]
name = "vcpkg"
version = "0.2.15"
@ -5907,25 +6106,6 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "webpki"
version = "0.22.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07ecc0cd7cac091bf682ec5efa18b1cff79d617b84181f38b3951dbe135f607f"
dependencies = [
"ring 0.16.20",
"untrusted 0.7.1",
]
[[package]]
name = "webpki-roots"
version = "0.22.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87"
dependencies = [
"webpki",
]
[[package]]
name = "webpki-roots"
version = "0.25.3"
@ -6232,7 +6412,7 @@ dependencies = [
[[package]]
name = "xtask"
version = "1.6.1"
version = "1.7.0"
dependencies = [
"cargo_metadata",
"clap",
@ -6287,6 +6467,26 @@ dependencies = [
"synstructure",
]
[[package]]
name = "zerocopy"
version = "0.7.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.48",
]
[[package]]
name = "zerofrom"
version = "0.1.3"

View File

@ -16,12 +16,16 @@ members = [
"json-depth-checker",
"benchmarks",
"fuzzers",
"tracing-trace",
"xtask",
]
[workspace.package]
version = "1.6.1"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
version = "1.7.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",
]
description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com"
readme = "README.md"

View File

@ -41,10 +41,10 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
## ✨ Features
- **Search-as-you-type:** find search results in less than 50 milliseconds
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results
- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
@ -61,8 +61,6 @@ You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) for an introduction to some of Meilisearch's most popular features.
## ⚡ Supercharge your Meilisearch experience
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required.
@ -101,7 +99,7 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle
- For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions)
- Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)!
- Want to be part of our Discord community? [Join us!](https://discord.gg/meilisearch)
- Want to be part of our Discord community? [Join us!](https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
Thank you for your support!

View File

@ -14,7 +14,6 @@ license.workspace = true
anyhow = "1.0.79"
flate2 = "1.0.28"
http = "0.2.11"
log = "0.4.20"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.19.0"
@ -26,6 +25,7 @@ tar = "0.4.40"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.40"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies]

View File

@ -120,7 +120,7 @@ impl From<v1::settings::Settings> for v2::Settings<v2::Unchecked> {
criterion.as_ref().map(ToString::to_string)
}
Err(()) => {
log::warn!(
tracing::warn!(
"Could not import the following ranking rule: `{}`.",
ranking_rule
);
@ -152,11 +152,11 @@ impl From<v1::update::UpdateStatus> for Option<v2::updates::UpdateStatus> {
use v2::updates::UpdateStatus as UpdateStatusV2;
Some(match source {
UpdateStatusV1::Enqueued { content } => {
log::warn!(
tracing::warn!(
"Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)",
content.update_id
);
log::warn!("Task will be skipped in the queue of imported tasks.");
tracing::warn!("Task will be skipped in the queue of imported tasks.");
return None;
}
@ -229,7 +229,7 @@ impl From<v1::update::UpdateType> for Option<v2::updates::UpdateMeta> {
Some(match source {
v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments,
v1::update::UpdateType::Customs => {
log::warn!("Ignoring task with type 'Customs' that is no longer supported");
tracing::warn!("Ignoring task with type 'Customs' that is no longer supported");
return None;
}
v1::update::UpdateType::DocumentsAddition { .. } => {
@ -296,7 +296,7 @@ impl From<v1::settings::RankingRule> for Option<v2::settings::Criterion> {
v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity),
v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute),
v1::settings::RankingRule::WordsPosition => {
log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
tracing::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
None
}
v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness),

View File

@ -146,8 +146,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
started_processing_at: processing.started_processing_at,
}),
Err(e) => {
log::warn!("Error with task {}: {}", processing.from.update_id, e);
log::warn!("Task will be marked as `Failed`.");
tracing::warn!("Error with task {}: {}", processing.from.update_id, e);
tracing::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing {
from: v3::updates::Enqueued {
@ -172,8 +172,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
enqueued_at: enqueued.enqueued_at,
}),
Err(e) => {
log::warn!("Error with task {}: {}", enqueued.update_id, e);
log::warn!("Task will be marked as `Failed`.");
tracing::warn!("Error with task {}: {}", enqueued.update_id, e);
tracing::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing {
from: v3::updates::Enqueued {
@ -353,7 +353,7 @@ impl From<String> for v3::Code {
"malformed_payload" => v3::Code::MalformedPayload,
"missing_payload" => v3::Code::MissingPayload,
other => {
log::warn!("Unknown error code {}", other);
tracing::warn!("Unknown error code {}", other);
v3::Code::UnretrievableErrorCode
}
}

View File

@ -76,20 +76,20 @@ impl CompatV3ToV4 {
let index_uid = match index_uid {
Some(uid) => uid,
None => {
log::warn!(
tracing::warn!(
"Error while importing the update {}.",
task.update.id()
);
log::warn!(
tracing::warn!(
"The index associated to the uuid `{}` could not be retrieved.",
task.uuid.to_string()
);
if task.update.is_finished() {
// we're fucking with his history but not his data, that's ok-ish.
log::warn!("The index-uuid will be set as `unknown`.");
tracing::warn!("The index-uuid will be set as `unknown`.");
String::from("unknown")
} else {
log::warn!("The task will be ignored.");
tracing::warn!("The task will be ignored.");
return None;
}
}

View File

@ -305,7 +305,7 @@ impl From<v4::ResponseError> for v5::ResponseError {
"invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt,
"invalid_api_key_description" => v5::Code::InvalidApiKeyDescription,
other => {
log::warn!("Unknown error code {}", other);
tracing::warn!("Unknown error code {}", other);
v5::Code::UnretrievableErrorCode
}
};

View File

@ -304,7 +304,7 @@ impl From<v5::ResponseError> for v6::ResponseError {
"immutable_field" => v6::Code::BadRequest,
"api_key_already_exists" => v6::Code::ApiKeyAlreadyExists,
other => {
log::warn!("Unknown error code {}", other);
tracing::warn!("Unknown error code {}", other);
v6::Code::UnretrievableErrorCode
}
};
@ -329,7 +329,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
new_ranking_rules.push(new_rule);
}
Err(_) => {
log::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
tracing::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
}
}
}

View File

@ -2,10 +2,10 @@ use std::fs::{self, File};
use std::io::{BufRead, BufReader, ErrorKind};
use std::path::Path;
use log::debug;
pub use meilisearch_types::milli;
use tempfile::TempDir;
use time::OffsetDateTime;
use tracing::debug;
use uuid::Uuid;
use super::Document;

View File

@ -1,5 +1,5 @@
use std::fs::File as StdFile;
use std::ops::{Deref, DerefMut};
use std::io::Write;
use std::path::{Path, PathBuf};
use std::str::FromStr;
@ -22,20 +22,6 @@ pub enum Error {
pub type Result<T> = std::result::Result<T, Error>;
impl Deref for File {
type Target = NamedTempFile;
fn deref(&self) -> &Self::Target {
&self.file
}
}
impl DerefMut for File {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.file
}
}
#[derive(Clone, Debug)]
pub struct FileStore {
path: PathBuf,
@ -56,7 +42,7 @@ impl FileStore {
let file = NamedTempFile::new_in(&self.path)?;
let uuid = Uuid::new_v4();
let path = self.path.join(uuid.to_string());
let update_file = File { file, path };
let update_file = File { file: Some(file), path };
Ok((uuid, update_file))
}
@ -67,7 +53,7 @@ impl FileStore {
let file = NamedTempFile::new_in(&self.path)?;
let uuid = Uuid::from_u128(uuid);
let path = self.path.join(uuid.to_string());
let update_file = File { file, path };
let update_file = File { file: Some(file), path };
Ok((uuid, update_file))
}
@ -136,16 +122,40 @@ impl FileStore {
pub struct File {
path: PathBuf,
file: NamedTempFile,
file: Option<NamedTempFile>,
}
impl File {
pub fn dry_file() -> Result<Self> {
Ok(Self { path: PathBuf::new(), file: None })
}
pub fn persist(self) -> Result<()> {
self.file.persist(&self.path)?;
if let Some(file) = self.file {
file.persist(&self.path)?;
}
Ok(())
}
}
impl Write for File {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if let Some(file) = self.file.as_mut() {
file.write(buf)
} else {
Ok(buf.len())
}
}
fn flush(&mut self) -> std::io::Result<()> {
if let Some(file) = self.file.as_mut() {
file.flush()
} else {
Ok(())
}
}
}
#[cfg(test)]
mod test {
use std::io::Write;

View File

@ -19,7 +19,6 @@ dump = { path = "../dump" }
enum-iterator = "1.5.0"
file-store = { path = "../file-store" }
flate2 = "1.0.28"
log = "0.4.20"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.5.0"
@ -30,7 +29,13 @@ serde_json = { version = "1.0.111", features = ["preserve_order"] }
synchronoise = "1.0.1"
tempfile = "3.9.0"
thiserror = "1.0.56"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
time = { version = "0.3.31", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tracing = "0.1.40"
ureq = "2.9.1"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
@ -39,4 +44,3 @@ big_s = "1.0.2"
crossbeam = "0.8.4"
insta = { version = "1.34.0", features = ["json", "redactions"] }
meili-snap = { path = "../meili-snap" }
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}

View File

@ -24,7 +24,6 @@ use std::fs::{self, File};
use std::io::BufWriter;
use dump::IndexMetadata;
use log::{debug, error, info, trace};
use meilisearch_types::error::Code;
use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
@ -514,6 +513,7 @@ impl IndexScheduler {
/// 3. We get the *next* snapshot to process.
/// 4. We get the *next* dump to process.
/// 5. We get the *next* tasks to process for a specific index.
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result<Option<Batch>> {
#[cfg(test)]
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
@ -619,6 +619,7 @@ impl IndexScheduler {
/// The list of tasks that were processed. The metadata of each task in the returned
/// list is updated accordingly, with the exception of the its date fields
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
#[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))]
pub(crate) fn process_batch(&self, batch: Batch) -> Result<Vec<Task>> {
#[cfg(test)]
{
@ -668,9 +669,10 @@ impl IndexScheduler {
Ok(()) => {
for content_uuid in canceled_tasks_content_uuids {
if let Err(error) = self.delete_update_file(content_uuid) {
error!(
"We failed deleting the content file indentified as {}: {}",
content_uuid, error
tracing::error!(
file_content_uuid = %content_uuid,
%error,
"Failed deleting content file"
)
}
}
@ -969,7 +971,10 @@ impl IndexScheduler {
match res {
Ok(_) => (),
Err(e) => error!("Could not write the stats of the index {}", e),
Err(e) => tracing::error!(
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
}
Ok(tasks)
@ -997,7 +1002,7 @@ impl IndexScheduler {
builder.set_primary_key(primary_key);
let must_stop_processing = self.must_stop_processing.clone();
builder.execute(
|indexing_step| debug!("update: {:?}", indexing_step),
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
)?;
index_wtxn.commit()?;
@ -1024,7 +1029,10 @@ impl IndexScheduler {
match res {
Ok(_) => (),
Err(e) => error!("Could not write the stats of the index {}", e),
Err(e) => tracing::error!(
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
}
Ok(vec![task])
@ -1143,6 +1151,11 @@ impl IndexScheduler {
///
/// ## Return
/// The list of processed tasks.
#[tracing::instrument(
level = "trace",
skip(self, index_wtxn, index),
target = "indexing::scheduler"
)]
fn apply_index_operation<'i>(
&self,
index_wtxn: &mut RwTxn<'i>,
@ -1203,7 +1216,7 @@ impl IndexScheduler {
milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.set_primary_key(primary_key);
builder.execute(
|indexing_step| debug!("update: {:?}", indexing_step),
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.clone().get(),
)?;
primary_key_has_been_set = true;
@ -1222,7 +1235,7 @@ impl IndexScheduler {
index,
indexer_config,
config,
|indexing_step| trace!("update: {:?}", indexing_step),
|indexing_step| tracing::trace!(?indexing_step, "Update"),
|| must_stop_processing.get(),
)?;
@ -1294,7 +1307,7 @@ impl IndexScheduler {
if !tasks.iter().all(|res| res.error.is_some()) {
let addition = builder.execute()?;
info!("document addition done: {:?}", addition);
tracing::info!(indexing_result = ?addition, "document indexing done");
} else if primary_key_has_been_set {
// Everything failed but we've set a primary key.
// We need to remove it.
@ -1302,7 +1315,7 @@ impl IndexScheduler {
milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.reset_primary_key();
builder.execute(
|indexing_step| trace!("update: {:?}", indexing_step),
|indexing_step| tracing::trace!(update = ?indexing_step),
|| must_stop_processing.clone().get(),
)?;
}
@ -1372,7 +1385,7 @@ impl IndexScheduler {
let must_stop_processing = self.must_stop_processing.clone();
builder.execute(
|indexing_step| debug!("update: {:?}", indexing_step),
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
)?;
@ -1584,7 +1597,7 @@ fn delete_document_by_filter<'a>(
index,
indexer_config,
config,
|indexing_step| debug!("update: {:?}", indexing_step),
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
)?;

View File

@ -48,6 +48,8 @@ impl From<DateField> for Code {
pub enum Error {
#[error("{1}")]
WithCustomErrorCode(Code, Box<Self>),
#[error("Received bad task id: {received} should be >= to {expected}.")]
BadTaskId { received: TaskId, expected: TaskId },
#[error("Index `{0}` not found.")]
IndexNotFound(String),
#[error("Index `{0}` already exists.")]
@ -161,6 +163,7 @@ impl Error {
match self {
Error::IndexNotFound(_)
| Error::WithCustomErrorCode(_, _)
| Error::BadTaskId { .. }
| Error::IndexAlreadyExists(_)
| Error::SwapDuplicateIndexFound(_)
| Error::SwapDuplicateIndexesFound(_)
@ -205,6 +208,7 @@ impl ErrorCode for Error {
fn error_code(&self) -> Code {
match self {
Error::WithCustomErrorCode(code, _) => *code,
Error::BadTaskId { .. } => Code::BadRequest,
Error::IndexNotFound(_) => Code::IndexNotFound,
Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound,

View File

@ -30,19 +30,6 @@ impl RoFeatures {
self.runtime
}
pub fn check_score_details(&self) -> Result<()> {
if self.runtime.score_details {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Computing score details",
feature: "score details",
issue_link: "https://github.com/meilisearch/product/discussions/674",
}
.into())
}
}
pub fn check_metrics(&self) -> Result<()> {
if self.runtime.metrics {
Ok(())
@ -56,6 +43,19 @@ impl RoFeatures {
}
}
pub fn check_logs_route(&self) -> Result<()> {
if self.runtime.logs_route {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "getting logs through the `/logs/stream` route",
feature: "logs route",
issue_link: "https://github.com/orgs/meilisearch/discussions/721",
}
.into())
}
}
pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.vector_store {
Ok(())
@ -94,6 +94,7 @@ impl FeatureData {
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: instance_features.metrics || persisted_features.metrics,
logs_route: instance_features.logs_route || persisted_features.logs_route,
..persisted_features
}));

View File

@ -3,13 +3,13 @@ use std::sync::{Arc, RwLock};
use std::time::Duration;
use std::{fs, thread};
use log::error;
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{FieldDistribution, Index};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use tracing::error;
use uuid::Uuid;
use self::index_map::IndexMap;

View File

@ -15,6 +15,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
let IndexScheduler {
autobatching_enabled,
cleanup_enabled: _,
must_stop_processing: _,
processing_tasks,
file_store,

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,90 @@
---
source: index-scheduler/src/lib.rs
---
[
{
"uid": 0,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": null,
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "succeeded",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
},
{
"uid": 1,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": {
"message": "Index `doggo` already exists.",
"code": "index_already_exists",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_already_exists"
},
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "failed",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
},
{
"uid": 2,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": null,
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "enqueued",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
},
{
"uid": 3,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": null,
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "enqueued",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
}
]

View File

@ -0,0 +1,90 @@
---
source: index-scheduler/src/lib.rs
---
[
{
"uid": 0,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": null,
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "succeeded",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
},
{
"uid": 1,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": {
"message": "Index `doggo` already exists.",
"code": "index_already_exists",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_already_exists"
},
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "failed",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
},
{
"uid": 2,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": null,
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "enqueued",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
},
{
"uid": 3,
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]",
"error": null,
"canceledBy": null,
"details": {
"IndexInfo": {
"primary_key": null
}
},
"status": "enqueued",
"kind": {
"indexCreation": {
"index_uid": "doggo",
"primary_key": null
}
}
}
]

View File

@ -11,7 +11,7 @@ edition.workspace = true
license.workspace = true
[dependencies]
actix-web = { version = "4.4.1", default-features = false }
actix-web = { version = "4.5.1", default-features = false }
anyhow = "1.0.79"
convert_case = "0.6.0"
csv = "1.3.0"

View File

@ -1,6 +1,6 @@
use std::fmt::{self, Debug, Display};
use std::fs::File;
use std::io::{self, Seek, Write};
use std::io::{self, BufWriter, Write};
use std::marker::PhantomData;
use memmap2::MmapOptions;
@ -104,8 +104,8 @@ impl ErrorCode for DocumentFormatError {
}
/// Reads CSV from input and write an obkv batch to writer.
pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(writer);
pub fn read_csv(file: &File, writer: impl Write, delimiter: u8) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
let mmap = unsafe { MmapOptions::new().map(file)? };
let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref());
builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?;
@ -116,9 +116,9 @@ pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result
Ok(count as u64)
}
/// Reads JSON from temporary file and write an obkv batch to writer.
pub fn read_json(file: &File, writer: impl Write + Seek) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(writer);
/// Reads JSON from temporary file and write an obkv batch to writer.
pub fn read_json(file: &File, writer: impl Write) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
let mmap = unsafe { MmapOptions::new().map(file)? };
let mut deserializer = serde_json::Deserializer::from_slice(&mmap);
@ -151,8 +151,8 @@ pub fn read_json(file: &File, writer: impl Write + Seek) -> Result<u64> {
}
/// Reads JSON from temporary file and write an obkv batch to writer.
pub fn read_ndjson(file: &File, writer: impl Write + Seek) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(writer);
pub fn read_ndjson(file: &File, writer: impl Write) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
let mmap = unsafe { MmapOptions::new().map(file)? };
for result in serde_json::Deserializer::from_slice(&mmap).into_iter() {

View File

@ -310,6 +310,8 @@ TooManyVectors , InvalidRequest , BAD_REQUEST ;
UnretrievableDocument , Internal , BAD_REQUEST ;
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
// Experimental features
VectorEmbeddingError , InvalidRequest , BAD_REQUEST
}
@ -347,6 +349,9 @@ impl ErrorCode for milli::Error {
UserError::InvalidFieldForSource { .. }
| UserError::MissingFieldForSource { .. }
| UserError::InvalidOpenAiModel { .. }
| UserError::InvalidOpenAiModelDimensions { .. }
| UserError::InvalidOpenAiModelDimensionsMax { .. }
| UserError::InvalidSettingsDimensions { .. }
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,

View File

@ -3,13 +3,14 @@ use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
#[serde(rename_all = "camelCase", default)]
pub struct RuntimeTogglableFeatures {
pub score_details: bool,
pub vector_store: bool,
pub metrics: bool,
pub logs_route: bool,
pub export_puffin_reports: bool,
}
#[derive(Default, Debug, Clone, Copy)]
pub struct InstanceTogglableFeatures {
pub metrics: bool,
pub logs_route: bool,
}

View File

@ -14,18 +14,18 @@ default-run = "meilisearch"
[dependencies]
actix-cors = "0.7.0"
actix-http = { version = "3.5.1", default-features = false, features = [
actix-http = { version = "3.6.0", default-features = false, features = [
"compress-brotli",
"compress-gzip",
"rustls",
"rustls-0_21",
] }
actix-utils = "3.0.1"
actix-web = { version = "4.4.1", default-features = false, features = [
actix-web = { version = "4.5.1", default-features = false, features = [
"macros",
"compress-brotli",
"compress-gzip",
"cookies",
"rustls",
"rustls-0_21",
] }
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
anyhow = { version = "1.0.79", features = ["backtrace"] }
@ -42,7 +42,6 @@ crossbeam-channel = "0.5.11"
deserr = { version = "0.6.1", features = ["actix-web"] }
dump = { path = "../dump" }
either = "1.9.0"
env_logger = "0.10.1"
file-store = { path = "../file-store" }
flate2 = "1.0.28"
fst = "0.4.7"
@ -53,9 +52,8 @@ index-scheduler = { path = "../index-scheduler" }
indexmap = { version = "2.1.0", features = ["serde"] }
is-terminal = "0.4.10"
itertools = "0.11.0"
jsonwebtoken = "8.3.0"
jsonwebtoken = "9.2.0"
lazy_static = "1.4.0"
log = "0.4.20"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.39", default-features = false }
@ -77,7 +75,7 @@ reqwest = { version = "0.11.23", features = [
"rustls-tls",
"json",
], default-features = false }
rustls = "0.20.8"
rustls = "0.21.6"
rustls-pemfile = "1.0.2"
segment = { version = "0.2.3", optional = true }
serde = { version = "1.0.195", features = ["derive"] }
@ -105,6 +103,10 @@ yaup = "0.2.1"
serde_urlencoded = "0.7.1"
termcolor = "1.4.1"
url = { version = "2.5.0", features = ["serde"] }
tracing = "0.1.40"
tracing-subscriber = "0.3.18"
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.9"
[dev-dependencies]
actix-rt = "2.9.0"

View File

@ -250,6 +250,8 @@ impl super::Analytics for SegmentAnalytics {
struct Infos {
env: String,
experimental_enable_metrics: bool,
experimental_replication_parameters: bool,
experimental_enable_logs_route: bool,
experimental_reduce_indexing_memory_usage: bool,
experimental_max_number_of_batched_tasks: usize,
db_path: bool,
@ -287,6 +289,8 @@ impl From<Opt> for Infos {
let Opt {
db_path,
experimental_enable_metrics,
experimental_replication_parameters,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks,
http_addr,
@ -333,6 +337,8 @@ impl From<Opt> for Infos {
Self {
env,
experimental_enable_metrics,
experimental_replication_parameters,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage,
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),

View File

@ -12,6 +12,8 @@ pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
MissingContentType(Vec<String>),
#[error("The `/logs/stream` route is currently in use by someone else.")]
AlreadyUsedLogRoute,
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
CsvDelimiterWithWrongContentType(String),
#[error(
@ -59,6 +61,7 @@ impl ErrorCode for MeilisearchHttpError {
fn error_code(&self) -> Code {
match self {
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
MeilisearchHttpError::AlreadyUsedLogRoute => Code::BadRequest,
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,

View File

@ -131,6 +131,7 @@ gen_seq! { SeqFromRequestFut3; A B C }
gen_seq! { SeqFromRequestFut4; A B C D }
gen_seq! { SeqFromRequestFut5; A B C D E }
gen_seq! { SeqFromRequestFut6; A B C D E F }
gen_seq! { SeqFromRequestFut7; A B C D E F G }
pin_project! {
#[project = ExtractProj]

View File

@ -29,7 +29,6 @@ use error::PayloadError;
use extractors::payload::PayloadConfig;
use http::header::CONTENT_TYPE;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use log::error;
use meilisearch_auth::AuthController;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
@ -39,6 +38,8 @@ use meilisearch_types::versioning::{check_version_file, create_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
pub use option::Opt;
use option::ScheduleSnapshot;
use tracing::{error, info_span};
use tracing_subscriber::filter::Targets;
use crate::error::MeilisearchHttpError;
@ -86,10 +87,21 @@ fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
}
}
/// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`.
pub type LogRouteHandle =
tracing_subscriber::reload::Handle<LogRouteType, tracing_subscriber::Registry>;
pub type LogRouteType = tracing_subscriber::filter::Filtered<
Option<Box<dyn tracing_subscriber::Layer<tracing_subscriber::Registry> + Send + Sync>>,
Targets,
tracing_subscriber::Registry,
>;
pub fn create_app(
index_scheduler: Data<IndexScheduler>,
auth_controller: Data<AuthController>,
opt: Opt,
logs: LogRouteHandle,
analytics: Arc<dyn Analytics>,
enable_dashboard: bool,
) -> actix_web::App<
@ -108,6 +120,7 @@ pub fn create_app(
index_scheduler.clone(),
auth_controller.clone(),
&opt,
logs,
analytics.clone(),
)
})
@ -123,11 +136,49 @@ pub fn create_app(
.allow_any_method()
.max_age(86_400), // 24h
)
.wrap(actix_web::middleware::Logger::default())
.wrap(tracing_actix_web::TracingLogger::<AwebTracingLogger>::new())
.wrap(actix_web::middleware::Compress::default())
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
}
struct AwebTracingLogger;
impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
fn on_request_start(request: &actix_web::dev::ServiceRequest) -> tracing::Span {
use tracing::field::Empty;
let conn_info = request.connection_info();
let headers = request.headers();
let user_agent = headers
.get(http::header::USER_AGENT)
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
.unwrap_or_default();
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
}
fn on_request_end<B: MessageBody>(
span: tracing::Span,
outcome: &Result<ServiceResponse<B>, actix_web::Error>,
) {
match &outcome {
Ok(response) => {
let code: i32 = response.response().status().as_u16().into();
span.record("status_code", code);
if let Some(error) = response.response().error() {
// use the status code already constructed for the outgoing HTTP response
span.record("error", &tracing::field::display(error.as_response_error()));
}
}
Err(error) => {
let code: i32 = error.error_response().status().as_u16().into();
span.record("status_code", code);
span.record("error", &tracing::field::display(error.as_response_error()));
}
};
}
}
enum OnFailure {
RemoveDb,
KeepDb,
@ -200,7 +251,9 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
.name(String::from("register-snapshot-tasks"))
.spawn(move || loop {
thread::sleep(snapshot_delay);
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
if let Err(e) =
index_scheduler.register(KindWithContent::SnapshotCreation, None, false)
{
error!("Error while registering snapshot: {}", e);
}
})
@ -235,6 +288,7 @@ fn open_or_create_database_unchecked(
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: true,
cleanup_enabled: !opt.experimental_replication_parameters,
max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
@ -280,15 +334,15 @@ fn import_dump(
let mut dump_reader = dump::DumpReader::open(reader)?;
if let Some(date) = dump_reader.date() {
log::info!(
"Importing a dump of meilisearch `{:?}` from the {}",
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
date
tracing::info!(
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
%date,
"Importing a dump of meilisearch"
);
} else {
log::info!(
"Importing a dump of meilisearch `{:?}`",
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
tracing::info!(
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
"Importing a dump of meilisearch",
);
}
@ -322,7 +376,7 @@ fn import_dump(
for index_reader in dump_reader.indexes()? {
let mut index_reader = index_reader?;
let metadata = index_reader.metadata();
log::info!("Importing index `{}`.", metadata.uid);
tracing::info!("Importing index `{}`.", metadata.uid);
let date = Some((metadata.created_at, metadata.updated_at));
let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
@ -336,14 +390,15 @@ fn import_dump(
}
// 4.2 Import the settings.
log::info!("Importing the settings.");
tracing::info!("Importing the settings.");
let settings = index_reader.settings()?;
apply_settings_to_builder(&settings, &mut builder);
builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?;
builder
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
// 4.3 Import the documents.
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
log::info!("Importing the documents.");
tracing::info!("Importing the documents.");
let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
for document in index_reader.documents()? {
@ -365,15 +420,16 @@ fn import_dump(
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
},
|indexing_step| log::trace!("update: {:?}", indexing_step),
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|| false,
)?;
let (builder, user_result) = builder.add_documents(reader)?;
log::info!("{} documents found.", user_result?);
let user_result = user_result?;
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
builder.execute()?;
wtxn.commit()?;
log::info!("All documents successfully imported.");
tracing::info!("All documents successfully imported.");
}
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
@ -391,6 +447,7 @@ pub fn configure_data(
index_scheduler: Data<IndexScheduler>,
auth: Data<AuthController>,
opt: &Opt,
logs: LogRouteHandle,
analytics: Arc<dyn Analytics>,
) {
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
@ -398,6 +455,8 @@ pub fn configure_data(
.app_data(index_scheduler)
.app_data(auth)
.app_data(web::Data::from(analytics))
.app_data(web::Data::new(logs))
.app_data(web::Data::new(opt.clone()))
.app_data(
web::JsonConfig::default()
.limit(http_payload_size_limit)

View File

@ -1,6 +1,7 @@
use std::env;
use std::io::{stderr, Write};
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use actix_web::http::KeepAlive;
@ -9,37 +10,60 @@ use actix_web::HttpServer;
use index_scheduler::IndexScheduler;
use is_terminal::IsTerminal;
use meilisearch::analytics::Analytics;
use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt};
use meilisearch::{
analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, LogRouteType, Opt,
};
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
use mimalloc::MiMalloc;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt as _;
use tracing_subscriber::Layer;
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
static ALLOC: MiMalloc = MiMalloc;
fn default_layer() -> LogRouteType {
None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
}
/// does all the setup before meilisearch is launched
fn setup(opt: &Opt) -> anyhow::Result<()> {
let mut log_builder = env_logger::Builder::new();
let log_filters = format!(
"{},h2=warn,hyper=warn,tokio_util=warn,tracing=warn,rustls=warn,mio=warn,reqwest=warn",
opt.log_level
fn setup(opt: &Opt) -> anyhow::Result<LogRouteHandle> {
let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(default_layer());
let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer;
let subscriber = tracing_subscriber::registry().with(route_layer).with(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE)
.with_filter(
tracing_subscriber::filter::LevelFilter::from_str(&opt.log_level.to_string())
.unwrap(),
),
);
log_builder.parse_filters(&log_filters);
log_builder.init();
// set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();
Ok(())
Ok(route_layer_handle)
}
fn on_panic(info: &std::panic::PanicInfo) {
let info = info.to_string().replace('\n', " ");
tracing::error!(%info);
}
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
std::panic::set_hook(Box::new(on_panic));
anyhow::ensure!(
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
);
setup(&opt)?;
let log_handle = setup(&opt)?;
match (opt.env.as_ref(), &opt.master_key) {
("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => {
@ -77,7 +101,7 @@ async fn main() -> anyhow::Result<()> {
print_launch_resume(&opt, analytics.clone(), config_read_from);
run_http(index_scheduler, auth_controller, opt, analytics).await?;
run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?;
Ok(())
}
@ -86,6 +110,7 @@ async fn run_http(
index_scheduler: Arc<IndexScheduler>,
auth_controller: Arc<AuthController>,
opt: Opt,
logs: LogRouteHandle,
analytics: Arc<dyn Analytics>,
) -> anyhow::Result<()> {
let enable_dashboard = &opt.env == "development";
@ -98,6 +123,7 @@ async fn run_http(
index_scheduler.clone(),
auth_controller.clone(),
opt.clone(),
logs.clone(),
analytics.clone(),
enable_dashboard,
)
@ -107,7 +133,7 @@ async fn run_http(
.keep_alive(KeepAlive::Os);
if let Some(config) = opt_clone.get_ssl_config()? {
http_server.bind_rustls(opt_clone.http_addr, config)?.run().await?;
http_server.bind_rustls_021(opt_clone.http_addr, config)?.run().await?;
} else {
http_server.bind(&opt_clone.http_addr)?.run().await?;
}

View File

@ -51,6 +51,8 @@ const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP";
const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS";
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
@ -309,6 +311,23 @@ pub struct Opt {
#[serde(default)]
pub experimental_enable_metrics: bool,
/// Experimental logs route feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/721>
///
/// Enables the log route on the `POST /logs/stream` endpoint and the `DELETE /logs/stream` to stop receiving logs.
#[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE)]
#[serde(default)]
pub experimental_enable_logs_route: bool,
/// Enable multiple features that helps you to run meilisearch in a replicated context.
/// For more information, see: <https://github.com/orgs/meilisearch/discussions/725>
///
/// - /!\ Disable the automatic clean up of old processed tasks, you're in charge of that now
/// - Lets you specify a custom task ID upon registering a task
/// - Lets you execute dry-register a task (get an answer from the route but nothing is actually registered in meilisearch and it won't be processed)
#[clap(long, env = MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS)]
#[serde(default)]
pub experimental_replication_parameters: bool,
/// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
#[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)]
#[serde(default)]
@ -414,6 +433,8 @@ impl Opt {
#[cfg(feature = "analytics")]
no_analytics,
experimental_enable_metrics,
experimental_enable_logs_route,
experimental_replication_parameters,
experimental_reduce_indexing_memory_usage,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
@ -470,6 +491,14 @@ impl Opt {
MEILI_EXPERIMENTAL_ENABLE_METRICS,
experimental_enable_metrics.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS,
experimental_replication_parameters.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE,
experimental_enable_logs_route.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
experimental_reduce_indexing_memory_usage.to_string(),
@ -490,11 +519,11 @@ impl Opt {
}
if self.ssl_require_auth {
let verifier = AllowAnyAuthenticatedClient::new(client_auth_roots);
config.with_client_cert_verifier(verifier)
config.with_client_cert_verifier(Arc::from(verifier))
} else {
let verifier =
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots);
config.with_client_cert_verifier(verifier)
config.with_client_cert_verifier(Arc::from(verifier))
}
}
None => config.with_no_client_auth(),
@ -524,7 +553,10 @@ impl Opt {
}
pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures {
InstanceTogglableFeatures { metrics: self.experimental_enable_metrics }
InstanceTogglableFeatures {
metrics: self.experimental_enable_metrics,
logs_route: self.experimental_enable_logs_route,
}
}
}

View File

@ -1,17 +1,18 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent;
use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::SummarizedTaskView;
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
use crate::Opt;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
@ -21,6 +22,7 @@ pub async fn create_dump(
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<AuthController>>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.publish("Dump Created".to_string(), json!({}), Some(&req));
@ -29,9 +31,13 @@ pub async fn create_dump(
keys: auth_controller.list_keys()?,
instance_uid: analytics.instance_uid().cloned(),
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!("returns: {:?}", task);
debug!(returns = ?task, "Create dump");
Ok(HttpResponse::Accepted().json(task))
}

View File

@ -3,11 +3,11 @@ use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::Deserr;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::ActionPolicy;
@ -33,20 +33,21 @@ async fn get_features(
let features = index_scheduler.features();
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
debug!("returns: {:?}", features.runtime_features());
HttpResponse::Ok().json(features.runtime_features())
let features = features.runtime_features();
debug!(returns = ?features, "Get features");
HttpResponse::Ok().json(features)
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct RuntimeTogglableFeatures {
#[deserr(default)]
pub score_details: Option<bool>,
#[deserr(default)]
pub vector_store: Option<bool>,
#[deserr(default)]
pub metrics: Option<bool>,
#[deserr(default)]
pub logs_route: Option<bool>,
#[deserr(default)]
pub export_puffin_reports: Option<bool>,
}
@ -60,12 +61,13 @@ async fn patch_features(
analytics: Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let features = index_scheduler.features();
debug!(parameters = ?new_features, "Patch features");
let old_features = features.runtime_features();
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
export_puffin_reports: new_features
.0
.export_puffin_reports
@ -76,22 +78,23 @@ async fn patch_features(
// the it renames to camelCase, which we don't want for analytics.
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
let meilisearch_types::features::RuntimeTogglableFeatures {
score_details,
vector_store,
metrics,
logs_route,
export_puffin_reports,
} = new_features;
analytics.publish(
"Experimental features Updated".to_string(),
json!({
"score_details": score_details,
"vector_store": vector_store,
"metrics": metrics,
"logs_route": logs_route,
"export_puffin_reports": export_puffin_reports,
}),
Some(&req),
);
index_scheduler.put_runtime_features(new_features)?;
debug!(returns = ?new_features, "Patch features");
Ok(HttpResponse::Ok().json(new_features))
}

View File

@ -7,8 +7,7 @@ use bstr::ByteSlice as _;
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::Deserr;
use futures::StreamExt;
use index_scheduler::IndexScheduler;
use log::debug;
use index_scheduler::{IndexScheduler, TaskId};
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
@ -28,6 +27,7 @@ use serde_json::Value;
use tempfile::tempfile;
use tokio::fs::File;
use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
use tracing::debug;
use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind};
use crate::error::MeilisearchHttpError;
@ -36,8 +36,11 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
};
use crate::search::parse_filter;
use crate::Opt;
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
@ -101,6 +104,7 @@ pub async fn get_document(
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = document_param.into_inner();
debug!(parameters = ?params, "Get document");
let index_uid = IndexUid::try_from(index_uid)?;
analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req);
@ -110,7 +114,7 @@ pub async fn get_document(
let index = index_scheduler.index(&index_uid)?;
let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?;
debug!("returns: {:?}", document);
debug!(returns = ?document, "Get document");
Ok(HttpResponse::Ok().json(document))
}
@ -118,6 +122,7 @@ pub async fn delete_document(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
path: web::Path<DocumentParam>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = path.into_inner();
@ -129,8 +134,12 @@ pub async fn delete_document(
index_uid: index_uid.to_string(),
documents_ids: vec![document_id],
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@ -168,9 +177,8 @@ pub async fn documents_by_query_post(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with body: {:?}", body);
let body = body.into_inner();
debug!(parameters = ?body, "Get documents POST");
analytics.post_fetch_documents(
&DocumentFetchKind::Normal {
@ -191,7 +199,7 @@ pub async fn get_documents(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
debug!(parameters = ?params, "Get documents GET");
let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
@ -235,7 +243,7 @@ fn documents_by_query(
let ret = PaginationView::new(offset, limit, total as usize, documents);
debug!("returns: {:?}", ret);
debug!(returns = ?ret, "Get documents");
Ok(HttpResponse::Ok().json(ret))
}
@ -267,16 +275,19 @@ pub async fn replace_documents(
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
body: Payload,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!("called with params: {:?}", params);
debug!(parameters = ?params, "Replace documents");
let params = params.into_inner();
analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
@ -285,9 +296,12 @@ pub async fn replace_documents(
params.csv_delimiter,
body,
IndexDocumentsMethod::ReplaceDocuments,
uid,
dry_run,
allow_index_creation,
)
.await?;
debug!(returns = ?task, "Replace documents");
Ok(HttpResponse::Accepted().json(task))
}
@ -298,16 +312,19 @@ pub async fn update_documents(
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
body: Payload,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!("called with params: {:?}", params);
let params = params.into_inner();
debug!(parameters = ?params, "Update documents");
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
@ -316,9 +333,12 @@ pub async fn update_documents(
params.csv_delimiter,
body,
IndexDocumentsMethod::UpdateDocuments,
uid,
dry_run,
allow_index_creation,
)
.await?;
debug!(returns = ?task, "Update documents");
Ok(HttpResponse::Accepted().json(task))
}
@ -332,6 +352,8 @@ async fn document_addition(
csv_delimiter: Option<u8>,
mut body: Payload,
method: IndexDocumentsMethod,
task_id: Option<TaskId>,
dry_run: bool,
allow_index_creation: bool,
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
let format = match (
@ -364,7 +386,7 @@ async fn document_addition(
}
};
let (uuid, mut update_file) = index_scheduler.create_update_file()?;
let (uuid, mut update_file) = index_scheduler.create_update_file(dry_run)?;
let temp_file = match tempfile() {
Ok(file) => file,
@ -403,11 +425,9 @@ async fn document_addition(
let read_file = buffer.into_inner().into_std().await;
let documents_count = tokio::task::spawn_blocking(move || {
let documents_count = match format {
PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?,
PayloadType::Csv { delimiter } => {
read_csv(&read_file, update_file.as_file_mut(), delimiter)?
}
PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?,
PayloadType::Json => read_json(&read_file, &mut update_file)?,
PayloadType::Csv { delimiter } => read_csv(&read_file, &mut update_file, delimiter)?,
PayloadType::Ndjson => read_ndjson(&read_file, &mut update_file)?,
};
// we NEED to persist the file here because we moved the `udpate_file` in another task.
update_file.persist()?;
@ -427,7 +447,10 @@ async fn document_addition(
Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e)))
if e.kind() == ErrorKind::NotFound => {}
Err(e) => {
log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}");
tracing::warn!(
index_uuid = %uuid,
"Unknown error happened while deleting a malformed update file: {e}"
);
}
}
// We still want to return the original error to the end user.
@ -445,7 +468,9 @@ async fn document_addition(
};
let scheduler = index_scheduler.clone();
let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? {
let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id, dry_run))
.await?
{
Ok(task) => task,
Err(e) => {
index_scheduler.delete_update_file(uuid)?;
@ -453,7 +478,6 @@ async fn document_addition(
}
};
debug!("returns: {:?}", task);
Ok(task.into())
}
@ -462,9 +486,10 @@ pub async fn delete_documents_batch(
index_uid: web::Path<String>,
body: web::Json<Vec<Value>>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body);
debug!(parameters = ?body, "Delete documents by batch");
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
@ -476,10 +501,14 @@ pub async fn delete_documents_batch(
let task =
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!("returns: {:?}", task);
debug!(returns = ?task, "Delete documents by batch");
Ok(HttpResponse::Accepted().json(task))
}
@ -495,9 +524,10 @@ pub async fn delete_documents_by_filter(
index_uid: web::Path<String>,
body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body);
debug!(parameters = ?body, "Delete documents by filter");
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner();
let filter = body.into_inner().filter;
@ -512,10 +542,14 @@ pub async fn delete_documents_by_filter(
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!("returns: {:?}", task);
debug!(returns = ?task, "Delete documents by filter");
Ok(HttpResponse::Accepted().json(task))
}
@ -523,16 +557,21 @@ pub async fn clear_all_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!("returns: {:?}", task);
debug!(returns = ?task, "Delete all documents");
Ok(HttpResponse::Accepted().json(task))
}

View File

@ -2,12 +2,12 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use serde_json::Value;
use tracing::debug;
use crate::analytics::{Analytics, FacetSearchAggregator};
use crate::extractors::authentication::policies::*;
@ -56,7 +56,7 @@ pub async fn search(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.into_inner();
debug!("facet search called with params: {:?}", query);
debug!(parameters = ?query, "Facet search");
let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
@ -83,7 +83,7 @@ pub async fn search(
let search_result = search_result?;
debug!("returns: {:?}", search_result);
debug!(returns = ?search_result, "Facet search");
Ok(HttpResponse::Ok().json(search_result))
}

View File

@ -5,7 +5,6 @@ use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::{DeserializeError, Deserr, ValuePointerRef};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
@ -16,12 +15,15 @@ use meilisearch_types::tasks::KindWithContent;
use serde::Serialize;
use serde_json::json;
use time::OffsetDateTime;
use tracing::debug;
use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::is_dry_run;
use crate::Opt;
pub mod documents;
pub mod facet_search;
@ -93,6 +95,7 @@ pub async fn list_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?paginate, "List indexes");
let filters = index_scheduler.filters();
let indexes: Vec<Option<IndexView>> =
index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
@ -105,7 +108,7 @@ pub async fn list_indexes(
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
debug!("returns: {:?}", ret);
debug!(returns = ?ret, "List indexes");
Ok(HttpResponse::Ok().json(ret))
}
@ -122,8 +125,10 @@ pub async fn create_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
body: AwebJson<IndexCreateRequest, DeserrJsonError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Create index");
let IndexCreateRequest { primary_key, uid } = body.into_inner();
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
@ -135,8 +140,13 @@ pub async fn create_index(
);
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!(returns = ?task, "Create index");
Ok(HttpResponse::Accepted().json(task))
} else {
@ -177,7 +187,7 @@ pub async fn get_index(
let index = index_scheduler.index(&index_uid)?;
let index_view = IndexView::new(index_uid.into_inner(), &index)?;
debug!("returns: {:?}", index_view);
debug!(returns = ?index_view, "Get index");
Ok(HttpResponse::Ok().json(index_view))
}
@ -187,9 +197,10 @@ pub async fn update_index(
index_uid: web::Path<String>,
body: AwebJson<UpdateIndexRequest, DeserrJsonError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body);
debug!(parameters = ?body, "Update index");
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let body = body.into_inner();
analytics.publish(
@ -203,21 +214,32 @@ pub async fn update_index(
primary_key: body.primary_key,
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!("returns: {:?}", task);
debug!(returns = ?task, "Update index");
Ok(HttpResponse::Accepted().json(task))
}
pub async fn delete_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
req: HttpRequest,
opt: web::Data<Opt>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!(returns = ?task, "Delete index");
Ok(HttpResponse::Accepted().json(task))
}
@ -255,6 +277,6 @@ pub async fn get_index_stats(
let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?);
debug!("returns: {:?}", stats);
debug!(returns = ?stats, "Get index stats");
Ok(HttpResponse::Ok().json(stats))
}

View File

@ -2,7 +2,6 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use index_scheduler::IndexScheduler;
use log::{debug, warn};
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
@ -12,6 +11,7 @@ use meilisearch_types::milli;
use meilisearch_types::milli::vector::DistributionShift;
use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value;
use tracing::{debug, warn};
use crate::analytics::{Analytics, SearchAggregator};
use crate::extractors::authentication::policies::*;
@ -186,7 +186,7 @@ pub async fn search_with_url_query(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
debug!(parameters = ?params, "Search get");
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query: SearchQuery = params.into_inner().into();
@ -213,7 +213,7 @@ pub async fn search_with_url_query(
let search_result = search_result?;
debug!("returns: {:?}", search_result);
debug!(returns = ?search_result, "Search get");
Ok(HttpResponse::Ok().json(search_result))
}
@ -227,7 +227,7 @@ pub async fn search_with_post(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query = params.into_inner();
debug!("search called with params: {:?}", query);
debug!(parameters = ?query, "Search post");
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
@ -252,7 +252,7 @@ pub async fn search_with_post(
let search_result = search_result?;
debug!("returns: {:?}", search_result);
debug!(returns = ?search_result, "Search post");
Ok(HttpResponse::Ok().json(search_result))
}

View File

@ -2,7 +2,6 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::facet_values_sort::FacetValuesSort;
@ -11,11 +10,13 @@ use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked};
use meilisearch_types::tasks::KindWithContent;
use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::routes::SummarizedTaskView;
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
use crate::Opt;
#[macro_export]
macro_rules! make_setting_route {
@ -24,17 +25,18 @@ macro_rules! make_setting_route {
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse, Resource};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, Settings};
use meilisearch_types::tasks::KindWithContent;
use tracing::debug;
use $crate::analytics::Analytics;
use $crate::extractors::authentication::policies::*;
use $crate::extractors::authentication::GuardedData;
use $crate::extractors::sequential_extractor::SeqHandler;
use $crate::routes::SummarizedTaskView;
use $crate::Opt;
use $crate::routes::{is_dry_run, get_task_id, SummarizedTaskView};
pub async fn delete(
index_scheduler: GuardedData<
@ -42,6 +44,8 @@ macro_rules! make_setting_route {
Data<IndexScheduler>,
>,
index_uid: web::Path<String>,
req: HttpRequest,
opt: web::Data<Opt>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
@ -56,12 +60,14 @@ macro_rules! make_setting_route {
is_deletion: true,
allow_index_creation,
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!("returns: {:?}", task);
debug!(returns = ?task, "Delete settings");
Ok(HttpResponse::Accepted().json(task))
}
@ -73,11 +79,13 @@ macro_rules! make_setting_route {
index_uid: actix_web::web::Path<String>,
body: deserr::actix_web::AwebJson<Option<$type>, $err_ty>,
req: HttpRequest,
opt: web::Data<Opt>,
$analytics_var: web::Data<dyn Analytics>,
) -> std::result::Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let body = body.into_inner();
debug!(parameters = ?body, "Update settings");
#[allow(clippy::redundant_closure_call)]
$analytics(&body, &req);
@ -104,12 +112,14 @@ macro_rules! make_setting_route {
is_deletion: false,
allow_index_creation,
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!("returns: {:?}", task);
debug!(returns = ?task, "Update settings");
Ok(HttpResponse::Accepted().json(task))
}
@ -126,7 +136,7 @@ macro_rules! make_setting_route {
let rtxn = index.read_txn()?;
let settings = settings(&index, &rtxn)?;
debug!("returns: {:?}", settings);
debug!(returns = ?settings, "Update settings");
let mut json = serde_json::json!(&settings);
let val = json[$camelcase_attr].take();
@ -651,11 +661,13 @@ pub async fn update_all(
index_uid: web::Path<String>,
body: AwebJson<Settings<Unchecked>, DeserrJsonError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let new_settings = body.into_inner();
debug!(parameters = ?new_settings, "Update all settings");
let new_settings = validate_settings(new_settings, &index_scheduler)?;
analytics.publish(
@ -765,10 +777,14 @@ pub async fn update_all(
is_deletion: false,
allow_index_creation,
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!("returns: {:?}", task);
debug!(returns = ?task, "Update all settings");
Ok(HttpResponse::Accepted().json(task))
}
@ -781,13 +797,15 @@ pub async fn get_all(
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
let new_settings = settings(&index, &rtxn)?;
debug!("returns: {:?}", new_settings);
debug!(returns = ?new_settings, "Get all settings");
Ok(HttpResponse::Ok().json(new_settings))
}
pub async fn delete_all(
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
req: HttpRequest,
opt: web::Data<Opt>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
@ -801,10 +819,14 @@ pub async fn delete_all(
is_deletion: true,
allow_index_creation,
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!("returns: {:?}", task);
debug!(returns = ?task, "Delete all settings");
Ok(HttpResponse::Accepted().json(task))
}

View File

@ -0,0 +1,281 @@
use std::convert::Infallible;
use std::io::Write;
use std::ops::ControlFlow;
use std::pin::Pin;
use std::str::FromStr;
use std::sync::Arc;
use actix_web::web::{Bytes, Data};
use actix_web::{web, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use futures_util::Stream;
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use tokio::sync::mpsc;
use tracing_subscriber::filter::Targets;
use tracing_subscriber::Layer;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::LogRouteHandle;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("stream")
.route(web::post().to(SeqHandler(get_logs)))
.route(web::delete().to(SeqHandler(cancel_logs))),
);
}
#[derive(Debug, Default, Clone, Copy, Deserr, PartialEq, Eq)]
#[deserr(rename_all = camelCase)]
pub enum LogMode {
#[default]
Human,
Profile,
}
/// Simple wrapper around the `Targets` from `tracing_subscriber` to implement `MergeWithError` on it.
#[derive(Clone, Debug)]
struct MyTargets(Targets);
/// Simple wrapper around the `ParseError` from `tracing_subscriber` to implement `MergeWithError` on it.
#[derive(Debug, thiserror::Error)]
enum MyParseError {
#[error(transparent)]
ParseError(#[from] tracing_subscriber::filter::ParseError),
#[error(
"Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`"
)]
Example,
}
impl FromStr for MyTargets {
type Err = MyParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
if s.is_empty() {
Err(MyParseError::Example)
} else {
Ok(MyTargets(Targets::from_str(s).map_err(MyParseError::ParseError)?))
}
}
}
impl MergeWithError<MyParseError> for DeserrJsonError<BadRequest> {
fn merge(
_self_: Option<Self>,
other: MyParseError,
merge_location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
Self::error::<Infallible>(
None,
ErrorKind::Unexpected { msg: other.to_string() },
merge_location,
)
}
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields, validate = validate_get_logs -> DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub struct GetLogs {
#[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError<BadRequest>)]
target: MyTargets,
#[deserr(default, error = DeserrJsonError<BadRequest>)]
mode: LogMode,
#[deserr(default = false, error = DeserrJsonError<BadRequest>)]
profile_memory: bool,
}
fn validate_get_logs<E: DeserializeError>(
logs: GetLogs,
location: ValuePointerRef,
) -> Result<GetLogs, E> {
if logs.profile_memory && logs.mode != LogMode::Profile {
Err(deserr::take_cf_content(E::error::<Infallible>(
None,
ErrorKind::Unexpected {
msg: format!("`profile_memory` can only be used while profiling code and is not compatible with the {:?} mode.", logs.mode),
},
location,
)))
} else {
Ok(logs)
}
}
struct LogWriter {
sender: mpsc::UnboundedSender<Vec<u8>>,
}
impl Write for LogWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.sender.send(buf.to_vec()).map_err(std::io::Error::other)?;
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
struct HandleGuard {
/// We need to keep an handle on the logs to make it available again when the streamer is dropped
logs: Arc<LogRouteHandle>,
}
impl Drop for HandleGuard {
fn drop(&mut self) {
if let Err(e) = self.logs.modify(|layer| *layer.inner_mut() = None) {
tracing::error!("Could not free the logs route: {e}");
}
}
}
fn byte_stream(
receiver: mpsc::UnboundedReceiver<Vec<u8>>,
guard: HandleGuard,
) -> impl futures_util::Stream<Item = Result<Bytes, ResponseError>> {
futures_util::stream::unfold((receiver, guard), move |(mut receiver, guard)| async move {
let vec = receiver.recv().await;
vec.map(From::from).map(Ok).map(|a| (a, (receiver, guard)))
})
}
type PinnedByteStream = Pin<Box<dyn Stream<Item = Result<Bytes, ResponseError>>>>;
fn make_layer<
S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>,
>(
opt: &GetLogs,
logs: Data<LogRouteHandle>,
) -> (Box<dyn Layer<S> + Send + Sync>, PinnedByteStream) {
let guard = HandleGuard { logs: logs.into_inner() };
match opt.mode {
LogMode::Human => {
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
let fmt_layer = tracing_subscriber::fmt::layer()
.with_writer(move || LogWriter { sender: sender.clone() })
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE);
let stream = byte_stream(receiver, guard);
(Box::new(fmt_layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
LogMode::Profile => {
let (trace, layer) = tracing_trace::Trace::new(opt.profile_memory);
let stream = entry_stream(trace, guard);
(Box::new(layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
}
}
fn entry_stream(
trace: tracing_trace::Trace,
guard: HandleGuard,
) -> impl Stream<Item = Result<Bytes, ResponseError>> {
let receiver = trace.into_receiver();
let entry_buf = Vec::new();
futures_util::stream::unfold(
(receiver, entry_buf, guard),
move |(mut receiver, mut entry_buf, guard)| async move {
let mut bytes = Vec::new();
while bytes.len() < 8192 {
entry_buf.clear();
let Ok(count) = tokio::time::timeout(
std::time::Duration::from_secs(1),
receiver.recv_many(&mut entry_buf, 100),
)
.await
else {
break;
};
if count == 0 {
if !bytes.is_empty() {
break;
}
// channel closed, exit
return None;
}
for entry in &entry_buf {
if let Err(error) = serde_json::to_writer(&mut bytes, entry) {
tracing::error!(
error = &error as &dyn std::error::Error,
"deserializing entry"
);
return Some((
Err(ResponseError::from_msg(
format!("error deserializing entry: {error}"),
Code::Internal,
)),
(receiver, entry_buf, guard),
));
}
}
}
Some((Ok(bytes.into()), (receiver, entry_buf, guard)))
},
)
}
pub async fn get_logs(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogRouteHandle>,
body: AwebJson<GetLogs, DeserrJsonError>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
let opt = body.into_inner();
let mut stream = None;
logs.modify(|layer| match layer.inner_mut() {
None => {
// there is no one getting logs
*layer.filter_mut() = opt.target.0.clone();
let (new_layer, new_stream) = make_layer(&opt, logs.clone());
*layer.inner_mut() = Some(new_layer);
stream = Some(new_stream);
}
Some(_) => {
// there is already someone getting logs
}
})
.unwrap();
if let Some(stream) = stream {
Ok(HttpResponse::Ok().streaming(stream))
} else {
Err(MeilisearchHttpError::AlreadyUsedLogRoute.into())
}
}
pub async fn cancel_logs(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogRouteHandle>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
if let Err(e) = logs.modify(|layer| *layer.inner_mut() = None) {
tracing::error!("Could not free the logs route: {e}");
}
Ok(HttpResponse::NoContent().finish())
}

View File

@ -3,18 +3,19 @@ use std::collections::BTreeMap;
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::settings::{Settings, Unchecked};
use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
use serde::{Deserialize, Serialize};
use serde_json::json;
use time::OffsetDateTime;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::Opt;
const PAGINATION_DEFAULT_LIMIT: usize = 20;
@ -22,6 +23,7 @@ mod api_key;
mod dump;
pub mod features;
pub mod indexes;
mod logs;
mod metrics;
mod multi_search;
mod snapshot;
@ -31,6 +33,7 @@ pub mod tasks;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::scope("/tasks").configure(tasks::configure))
.service(web::resource("/health").route(web::get().to(get_health)))
.service(web::scope("/logs").configure(logs::configure))
.service(web::scope("/keys").configure(api_key::configure))
.service(web::scope("/dumps").configure(dump::configure))
.service(web::scope("/snapshots").configure(snapshot::configure))
@ -43,6 +46,56 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::scope("/experimental-features").configure(features::configure));
}
pub fn get_task_id(req: &HttpRequest, opt: &Opt) -> Result<Option<TaskId>, ResponseError> {
if !opt.experimental_replication_parameters {
return Ok(None);
}
let task_id = req
.headers()
.get("TaskId")
.map(|header| {
header.to_str().map_err(|e| {
ResponseError::from_msg(
format!("TaskId is not a valid utf-8 string: {e}"),
Code::BadRequest,
)
})
})
.transpose()?
.map(|s| {
s.parse::<TaskId>().map_err(|e| {
ResponseError::from_msg(
format!(
"Could not parse the TaskId as a {}: {e}",
std::any::type_name::<TaskId>(),
),
Code::BadRequest,
)
})
})
.transpose()?;
Ok(task_id)
}
pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result<bool, ResponseError> {
if !opt.experimental_replication_parameters {
return Ok(false);
}
Ok(req
.headers()
.get("DryRun")
.map(|header| {
header.to_str().map_err(|e| {
ResponseError::from_msg(
format!("DryRun is not a valid utf-8 string: {e}"),
Code::BadRequest,
)
})
})
.transpose()?
.map_or(false, |s| s.to_lowercase() == "true"))
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SummarizedTaskView {
@ -250,7 +303,7 @@ async fn get_stats(
let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
debug!("returns: {:?}", stats);
debug!(returns = ?stats, "Get stats");
Ok(HttpResponse::Ok().json(stats))
}

View File

@ -3,11 +3,11 @@ use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use serde::Serialize;
use tracing::debug;
use crate::analytics::{Analytics, MultiSearchAggregator};
use crate::extractors::authentication::policies::ActionPolicy;
@ -52,7 +52,7 @@ pub async fn multi_search_with_post(
for (query_index, (index_uid, mut query)) in
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
{
debug!("multi-search #{query_index}: called with params: {:?}", query);
debug!(on_index = query_index, parameters = ?query, "Multi-search");
// Check index from API key
if !index_scheduler.filters().is_index_authorized(&index_uid) {
@ -107,7 +107,7 @@ pub async fn multi_search_with_post(
err
})?;
debug!("returns: {:?}", search_results);
debug!(returns = ?search_results, "Multi-search");
Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
}

View File

@ -1,16 +1,17 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent;
use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::SummarizedTaskView;
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
use crate::Opt;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot))));
@ -19,14 +20,19 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
pub async fn create_snapshot(
index_scheduler: GuardedData<ActionPolicy<{ actions::SNAPSHOTS_CREATE }>, Data<IndexScheduler>>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req));
let task = KindWithContent::SnapshotCreation;
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!("returns: {:?}", task);
debug!(returns = ?task, "Create snapshot");
Ok(HttpResponse::Accepted().json(task))
}

View File

@ -10,12 +10,13 @@ use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
use serde_json::json;
use super::SummarizedTaskView;
use super::{get_task_id, is_dry_run, SummarizedTaskView};
use crate::analytics::Analytics;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::Opt;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes))));
@ -32,6 +33,7 @@ pub async fn swap_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_SWAP }>, Data<IndexScheduler>>,
params: AwebJson<Vec<SwapIndexesPayload>, DeserrJsonError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let params = params.into_inner();
@ -60,7 +62,11 @@ pub async fn swap_indexes(
}
let task = KindWithContent::IndexSwap { swaps };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
Ok(HttpResponse::Accepted().json(task))
}

View File

@ -18,11 +18,12 @@ use time::macros::format_description;
use time::{Date, Duration, OffsetDateTime, Time};
use tokio::task;
use super::SummarizedTaskView;
use super::{get_task_id, is_dry_run, SummarizedTaskView};
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::Opt;
const DEFAULT_LIMIT: u32 = 20;
@ -161,6 +162,7 @@ async fn cancel_tasks(
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_CANCEL }>, Data<IndexScheduler>>,
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let params = params.into_inner();
@ -197,7 +199,11 @@ async fn cancel_tasks(
let task_cancelation =
KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };
let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation)).await??;
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task =
task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid, dry_run))
.await??;
let task: SummarizedTaskView = task.into();
Ok(HttpResponse::Ok().json(task))
@ -207,6 +213,7 @@ async fn delete_tasks(
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_DELETE }>, Data<IndexScheduler>>,
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let params = params.into_inner();
@ -242,7 +249,10 @@ async fn delete_tasks(
let task_deletion =
KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };
let task = task::spawn_blocking(move || index_scheduler.register(task_deletion)).await??;
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid, dry_run))
.await??;
let task: SummarizedTaskView = task.into();
Ok(HttpResponse::Ok().json(task))

View File

@ -441,10 +441,6 @@ fn prepare_search<'t>(
ScoringStrategy::Skip
});
if query.show_ranking_score_details {
features.check_score_details()?;
}
if let Some(HybridQuery { embedder: Some(embedder), .. }) = &query.hybrid {
search.embedder_name(embedder);
}

View File

@ -59,6 +59,8 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
("POST", "/snapshots") => hashset!{"snapshots.create", "snapshots.*", "*"},
("GET", "/version") => hashset!{"version", "*"},
("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"},
("POST", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
("DELETE", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"},
("GET", "/keys/mykey/") => hashset!{"keys.get", "*"},
("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"},

View File

@ -100,16 +100,11 @@ impl Index<'_> {
pub async fn raw_add_documents(
&self,
payload: &str,
content_type: Option<&str>,
headers: Vec<(&str, &str)>,
query_parameter: &str,
) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter);
if let Some(content_type) = content_type {
self.service.post_str(url, payload, vec![("Content-Type", content_type)]).await
} else {
self.service.post_str(url, payload, Vec::new()).await
}
self.service.post_str(url, payload, headers).await
}
pub async fn update_documents(

View File

@ -13,6 +13,8 @@ use meilisearch::{analytics, create_app, setup_meilisearch};
use once_cell::sync::Lazy;
use tempfile::TempDir;
use tokio::time::sleep;
use tracing::level_filters::LevelFilter;
use tracing_subscriber::Layer;
use super::index::Index;
use super::service::Service;
@ -81,10 +83,16 @@ impl Server {
Response = ServiceResponse<impl MessageBody>,
Error = actix_web::Error,
> {
let (_route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
actix_web::test::init_service(create_app(
self.service.index_scheduler.clone().into(),
self.service.auth.clone().into(),
self.service.options.clone(),
route_layer_handle,
analytics::MockAnalytics::new(&self.service.options),
true,
))

View File

@ -7,6 +7,8 @@ use actix_web::test::TestRequest;
use index_scheduler::IndexScheduler;
use meilisearch::{analytics, create_app, Opt};
use meilisearch_auth::AuthController;
use tracing::level_filters::LevelFilter;
use tracing_subscriber::Layer;
use crate::common::encoder::Encoder;
use crate::common::Value;
@ -105,10 +107,16 @@ impl Service {
}
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
let (_route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
let app = test::init_service(create_app(
self.index_scheduler.clone().into(),
self.auth.clone().into(),
self.options.clone(),
route_layer_handle,
analytics::MockAnalytics::new(&self.options),
true,
))

View File

@ -1,10 +1,11 @@
use actix_web::test;
use meili_snap::{json_string, snapshot};
use meilisearch::Opt;
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
use crate::common::encoder::Encoder;
use crate::common::{GetAllDocumentsOptions, Server, Value};
use crate::common::{default_settings, GetAllDocumentsOptions, Server, Value};
use crate::json;
/// This is the basic usage of our API and every other tests uses the content-type application/json
@ -2157,3 +2158,49 @@ async fn batch_several_documents_addition() {
assert_eq!(code, 200, "failed with `{}`", response);
assert_eq!(response["results"].as_array().unwrap().len(), 120);
}
#[actix_rt::test]
async fn dry_register_file() {
let temp = tempfile::tempdir().unwrap();
let options =
Opt { experimental_replication_parameters: true, ..default_settings(temp.path()) };
let server = Server::new_with_options(options).await.unwrap();
let index = server.index("tamo");
let documents = r#"
{
"id": "12",
"doggo": "kefir"
}
"#;
let (response, code) = index
.raw_add_documents(
documents,
vec![("Content-Type", "application/json"), ("DryRun", "true")],
"",
)
.await;
snapshot!(response, @r###"
{
"taskUid": 0,
"indexUid": "tamo",
"status": "enqueued",
"type": "documentAdditionOrUpdate",
"enqueuedAt": "[date]"
}
"###);
snapshot!(code, @"202 Accepted");
let (response, code) = index.get_task(response.uid()).await;
snapshot!(response, @r###"
{
"message": "Task `0` not found.",
"code": "task_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#task_not_found"
}
"###);
snapshot!(code, @"404 Not Found");
}

View File

@ -209,7 +209,8 @@ async fn replace_documents_missing_payload() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", Some("application/json"), "").await;
let (response, code) =
index.raw_add_documents("", vec![("Content-Type", "application/json")], "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -220,7 +221,8 @@ async fn replace_documents_missing_payload() {
}
"###);
let (response, code) = index.raw_add_documents("", Some("application/x-ndjson"), "").await;
let (response, code) =
index.raw_add_documents("", vec![("Content-Type", "application/x-ndjson")], "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -231,7 +233,8 @@ async fn replace_documents_missing_payload() {
}
"###);
let (response, code) = index.raw_add_documents("", Some("text/csv"), "").await;
let (response, code) =
index.raw_add_documents("", vec![("Content-Type", "text/csv")], "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -287,7 +290,7 @@ async fn replace_documents_missing_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", None, "").await;
let (response, code) = index.raw_add_documents("", Vec::new(), "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
@ -299,7 +302,7 @@ async fn replace_documents_missing_content_type() {
"###);
// even with a csv delimiter specified this error is triggered first
let (response, code) = index.raw_add_documents("", None, "?csvDelimiter=;").await;
let (response, code) = index.raw_add_documents("", Vec::new(), "?csvDelimiter=;").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
@ -345,7 +348,7 @@ async fn replace_documents_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", Some("doggo"), "").await;
let (response, code) = index.raw_add_documents("", vec![("Content-Type", "doggo")], "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
@ -379,8 +382,9 @@ async fn replace_documents_bad_csv_delimiter() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter").await;
let (response, code) = index
.raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter")
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -391,8 +395,9 @@ async fn replace_documents_bad_csv_delimiter() {
}
"###);
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter=doggo").await;
let (response, code) = index
.raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter=doggo")
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
@ -404,7 +409,11 @@ async fn replace_documents_bad_csv_delimiter() {
"###);
let (response, code) = index
.raw_add_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰")))
.raw_add_documents(
"",
vec![("Content-Type", "application/json")],
&format!("?csvDelimiter={}", encode("🍰")),
)
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
@ -469,8 +478,9 @@ async fn replace_documents_csv_delimiter_with_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter=a").await;
let (response, code) = index
.raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter=a")
.await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
@ -481,8 +491,9 @@ async fn replace_documents_csv_delimiter_with_bad_content_type() {
}
"###);
let (response, code) =
index.raw_add_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await;
let (response, code) = index
.raw_add_documents("", vec![("Content-Type", "application/x-ndjson")], "?csvDelimiter=a")
.await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{

View File

@ -1845,9 +1845,9 @@ async fn import_dump_v6_containing_experimental_features() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);

View File

@ -18,9 +18,9 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -30,9 +30,9 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -42,9 +42,9 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -55,9 +55,9 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -68,9 +68,9 @@ async fn experimental_features() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -88,9 +88,9 @@ async fn experimental_feature_metrics() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": true,
"logsRoute": false,
"exportPuffinReports": false
}
"###);
@ -146,7 +146,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"

View File

@ -2,9 +2,10 @@ use actix_web::http::header::ContentType;
use actix_web::test;
use http::header::ACCEPT_ENCODING;
use meili_snap::{json_string, snapshot};
use meilisearch::Opt;
use crate::common::encoder::Encoder;
use crate::common::{Server, Value};
use crate::common::{default_settings, Server, Value};
use crate::json;
#[actix_rt::test]
@ -199,3 +200,79 @@ async fn error_create_with_invalid_index_uid() {
}
"###);
}
#[actix_rt::test]
async fn send_task_id() {
let temp = tempfile::tempdir().unwrap();
let options =
Opt { experimental_replication_parameters: true, ..default_settings(temp.path()) };
let server = Server::new_with_options(options).await.unwrap();
let app = server.init_web_app().await;
let index = server.index("catto");
let (response, code) = index.create(None).await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "catto",
"status": "enqueued",
"type": "indexCreation",
"enqueuedAt": "[date]"
}
"###);
let body = serde_json::to_string(&json!({
"uid": "doggo",
"primaryKey": None::<&str>,
}))
.unwrap();
let req = test::TestRequest::post()
.uri("/indexes")
.insert_header(("TaskId", "25"))
.insert_header(ContentType::json())
.set_payload(body)
.to_request();
let res = test::call_service(&app, req).await;
snapshot!(res.status(), @"202 Accepted");
let bytes = test::read_body(res).await;
let response = serde_json::from_slice::<Value>(&bytes).expect("Expecting valid json");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 25,
"indexUid": "doggo",
"status": "enqueued",
"type": "indexCreation",
"enqueuedAt": "[date]"
}
"###);
let body = serde_json::to_string(&json!({
"uid": "girafo",
"primaryKey": None::<&str>,
}))
.unwrap();
let req = test::TestRequest::post()
.uri("/indexes")
.insert_header(("TaskId", "12"))
.insert_header(ContentType::json())
.set_payload(body)
.to_request();
let res = test::call_service(&app, req).await;
snapshot!(res.status(), @"400 Bad Request");
let bytes = test::read_body(res).await;
let response = serde_json::from_slice::<Value>(&bytes).expect("Expecting valid json");
snapshot!(json_string!(response), @r###"
{
"message": "Received bad task id: 12 should be >= to 26.",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}

View File

@ -5,6 +5,7 @@ mod documents;
mod dumps;
mod features;
mod index;
mod logs;
mod search;
mod settings;
mod snapshot;

View File

@ -0,0 +1,182 @@
use meili_snap::*;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn logs_stream_bad_target() {
let server = Server::new().await;
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "target": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.target`: expected a string, but found a boolean: `true`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "target": [] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.target`: expected a string, but found an array: `[]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Our help message
let (response, code) = server.service.post("/logs/stream", json!({ "target": "" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value at `.target`: Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// An error from the target parser
let (response, code) = server.service.post("/logs/stream", json!({ "target": "==" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value at `.target`: invalid filter directive: too many '=' in filter directive, expected 0 or 1",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_bad_mode() {
let server = Server::new().await;
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "mode": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.mode`: expected a string, but found a boolean: `true`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "mode": [] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.mode`: expected a string, but found an array: `[]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong value
let (response, code) = server.service.post("/logs/stream", json!({ "mode": "tamo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Unknown value `tamo` at `.mode`: expected one of `human`, `profile`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_bad_profile_memory() {
let server = Server::new().await;
// Wrong type
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": "tamo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found a string: `\"tamo\"`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": ["hello", "kefir"] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found an array: `[\"hello\",\"kefir\"]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Used with default parameters
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Human mode.",
"code": "invalid_settings_typo_tolerance",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance"
}
"###);
// Used with an unsupported mode
let (response, code) =
server.service.post("/logs/stream", json!({ "mode": "fmt", "profileMemory": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Unknown value `fmt` at `.mode`: expected one of `human`, `profile`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_without_enabling_the_route() {
let server = Server::new().await;
let (response, code) = server.service.post("/logs/stream", json!({})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
let (response, code) = server.service.delete("/logs/stream").await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
}

View File

@ -0,0 +1,92 @@
mod error;
use std::rc::Rc;
use std::str::FromStr;
use actix_web::http::header::ContentType;
use meili_snap::snapshot;
use meilisearch::{analytics, create_app, Opt};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::Layer;
use crate::common::{default_settings, Server};
use crate::json;
#[actix_web::test]
async fn basic_test_log_stream_route() {
let db_path = tempfile::tempdir().unwrap();
let server = Server::new_with_options(Opt {
experimental_enable_logs_route: true,
..default_settings(db_path.path())
})
.await
.unwrap();
let (route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
let subscriber = tracing_subscriber::registry().with(route_layer).with(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE)
.with_filter(tracing_subscriber::filter::LevelFilter::from_str("INFO").unwrap()),
);
let app = actix_web::test::init_service(create_app(
server.service.index_scheduler.clone().into(),
server.service.auth.clone().into(),
server.service.options.clone(),
route_layer_handle,
analytics::MockAnalytics::new(&server.service.options),
true,
))
.await;
// set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();
let app = Rc::new(app);
// First, we start listening on the `/logs/stream` route
let handle_app = app.clone();
let handle = tokio::task::spawn_local(async move {
let req = actix_web::test::TestRequest::post()
.uri("/logs/stream")
.insert_header(ContentType::json())
.set_payload(
serde_json::to_vec(&json!({
"mode": "human",
"target": "info",
}))
.unwrap(),
);
let req = req.to_request();
let ret = actix_web::test::call_service(&*handle_app, req).await;
actix_web::test::read_body(ret).await
});
// We're going to create an index to get at least one info log saying we processed a batch of task
let (ret, _code) = server.create_index(json!({ "uid": "tamo" })).await;
snapshot!(ret, @r###"
{
"taskUid": 0,
"indexUid": "tamo",
"status": "enqueued",
"type": "indexCreation",
"enqueuedAt": "[date]"
}
"###);
server.wait_task(ret.uid()).await;
let req = actix_web::test::TestRequest::delete().uri("/logs/stream");
let req = req.to_request();
let ret = actix_web::test::call_service(&*app, req).await;
let code = ret.status();
snapshot!(code, @"204 No Content");
let logs = handle.await.unwrap();
let logs = String::from_utf8(logs.to_vec()).unwrap();
assert!(logs.contains("INFO"), "{logs}");
}

View File

@ -13,9 +13,9 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
}
"###);

View File

@ -766,38 +766,14 @@ async fn faceting_max_values_per_facet() {
}
#[actix_rt::test]
async fn experimental_feature_score_details() {
async fn test_score_details() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(json!(documents), None).await;
index.wait_task(0).await;
index
.search(
json!({
"q": "train dragon",
"showRankingScoreDetails": true,
}),
|response, code| {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Computing score details requires enabling the `score details` experimental feature. See https://github.com/meilisearch/product/discussions/674",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
},
)
.await;
let (response, code) = server.set_features(json!({"scoreDetails": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response["scoreDetails"], @"true");
let res = index.add_documents(json!(documents), None).await;
index.wait_task(res.0.uid()).await;
index
.search(

View File

@ -44,10 +44,6 @@ struct WebhookHandle {
}
async fn create_webhook_server() -> WebhookHandle {
let mut log_builder = env_logger::Builder::new();
log_builder.parse_filters("info");
log_builder.init();
let (sender, receiver) = mpsc::unbounded_channel();
let sender = Arc::new(sender);

View File

@ -71,7 +71,6 @@ itertools = "0.11.0"
puffin = "0.16.0"
# logging
log = "0.4.20"
logging_timer = "1.1.0"
csv = "1.3.0"
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
@ -91,6 +90,7 @@ tiktoken-rs = "0.5.8"
liquid = "0.26.4"
arroy = "0.2.0"
rand = "0.8.5"
tracing = "0.1.40"
[dev-dependencies]
mimalloc = { version = "0.1.39", default-features = false }
@ -102,15 +102,7 @@ meili-snap = { path = "../meili-snap" }
rand = { version = "0.8.5", features = ["small_rng"] }
[features]
all-tokenizations = [
"charabia/chinese",
"charabia/hebrew",
"charabia/japanese",
"charabia/thai",
"charabia/korean",
"charabia/greek",
"charabia/khmer",
]
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]
# Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml

View File

@ -25,6 +25,7 @@ impl<R: io::Read + io::Seek> DocumentsBatchReader<R> {
///
/// It first retrieves the index, then moves to the first document. Use the `into_cursor`
/// method to iterator over the documents, from the first to the last.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn from_reader(reader: R) -> Result<Self, Error> {
let reader = grenad::Reader::new(reader)?;
let mut cursor = reader.into_cursor()?;

View File

@ -227,6 +227,22 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
source_: crate::vector::settings::EmbedderSource,
embedder_name: String,
},
#[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its native dimensions of {expected_dimensions}. Found {dimensions}")]
InvalidOpenAiModelDimensions {
embedder_name: String,
model: &'static str,
dimensions: usize,
expected_dimensions: usize,
},
#[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its dimensions to a value higher than {max_dimensions}. Found {dimensions}")]
InvalidOpenAiModelDimensionsMax {
embedder_name: String,
model: &'static str,
dimensions: usize,
max_dimensions: usize,
},
#[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
InvalidSettingsDimensions { embedder_name: String },
}
impl From<crate::vector::Error> for Error {

View File

@ -6,9 +6,9 @@ use charabia::Normalize;
use fst::automaton::{Automaton, Str};
use fst::{IntoStreamer, Streamer};
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
use log::error;
use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap;
use tracing::error;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};

View File

@ -166,6 +166,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
continue;
}
let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id());
let entered = span.enter();
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
ctx,
logger,
@ -175,6 +178,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
back!();
continue;
};
drop(entered);
ranking_rule_scores.push(next_bucket.score);

View File

@ -14,6 +14,12 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
ClearDocuments { wtxn, index }
}
#[tracing::instrument(
level = "trace",
skip(self),
target = "indexing::documents",
name = "clear_documents"
)]
pub fn execute(self) -> Result<u64> {
puffin::profile_function!();

View File

@ -85,8 +85,8 @@ use charabia::normalizer::{Normalize, NormalizerOption};
use grenad::{CompressionType, SortAlgorithm};
use heed::types::{Bytes, DecodeIgnore, SerdeJson};
use heed::BytesEncode;
use log::debug;
use time::OffsetDateTime;
use tracing::debug;
use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk;

View File

@ -22,6 +22,7 @@ use crate::{FieldId, Index, Result};
/// # Panics
///
/// - if reader.is_empty(), this function may panic in some cases
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn enrich_documents_batch<R: Read + Seek>(
rtxn: &heed::RoTxn,
index: &Index,
@ -77,7 +78,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
},
[] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
[(field_id, name)] => {
log::info!("Primary key was not specified in index. Inferred to '{name}'");
tracing::info!("Primary key was not specified in index. Inferred to '{name}'");
PrimaryKey::Flat { name, field_id: *field_id }
}
multiple => {
@ -143,6 +144,8 @@ pub fn enrich_documents_batch<R: Read + Seek>(
/// Retrieve the document id after validating it, returning a `UserError`
/// if the id is invalid or can't be guessed.
#[tracing::instrument(level = "trace", skip(uuid_buffer, documents_batch_index, document)
target = "indexing::documents")]
fn fetch_or_generate_document_id(
document: &obkv::KvReader<FieldId>,
documents_batch_index: &DocumentsBatchIndex,

View File

@ -21,7 +21,7 @@ pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, R
///
/// Returns the generated internal documents ids and a grenad reader
/// with the list of extracted words from the given chunk of documents.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@ -16,7 +16,7 @@ use crate::Result;
///
/// Returns a grenad reader with the list of extracted facet numbers and
/// documents ids from the given chunk of docid facet number positions.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
fid_docid_facet_number: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@ -15,7 +15,7 @@ use crate::{FieldId, Result};
///
/// Returns a grenad reader with the list of extracted facet strings and
/// documents ids from the given chunk of docid facet string positions.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
docid_fid_facet_string: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@ -39,7 +39,7 @@ pub struct ExtractedFacetValues {
/// Returns the generated grenad reader containing the docid the fid and the orginal value as key
/// and the normalized value as value extracted from the given chunk of documents.
/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,
@ -431,7 +431,7 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
if let Ok(float) = original.parse() {
output_numbers.push(float);
} else {
log::warn!(
tracing::warn!(
"Internal error, could not parse a geofield that has been validated. Please open an issue."
)
}

View File

@ -19,7 +19,7 @@ const MAX_COUNTED_WORDS: usize = 30;
///
/// Returns a grenad reader with the list of extracted field id word counts
/// and documents ids from the given chunk of docid word positions.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@ -13,7 +13,7 @@ use crate::{FieldId, InternalError, Result};
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
///
/// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude)
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_geo_points<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@ -67,7 +67,7 @@ impl VectorStateDelta {
/// Extracts the embedding vector contained in each document under the `_vectors` field.
///
/// Returns the generated grenad reader containing the docid as key associated to the Vec<f32>
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_vector_points<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,
@ -186,12 +186,12 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default();
let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?;
if old_prompt != new_prompt {
log::trace!(
tracing::trace!(
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
);
VectorStateDelta::NowGenerated(new_prompt)
} else {
log::trace!("⏭️ Prompt unmodified, skipping");
tracing::trace!("⏭️ Prompt unmodified, skipping");
VectorStateDelta::NoChange
}
} else {

View File

@ -23,7 +23,7 @@ use crate::{DocumentId, FieldId, Result};
///
/// The first returned reader is the one for normal word_docids, and the second one is for
/// exact_word_docids
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_word_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
@ -135,6 +135,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
))
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
fn words_into_sorter(
document_id: DocumentId,
fid: FieldId,

View File

@ -19,7 +19,7 @@ use crate::{DocumentId, Result};
///
/// Returns a grenad reader with the list of extracted word pairs proximities and
/// documents ids from the given chunk of docid word positions.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
@ -59,6 +59,10 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
if current_document_id.map_or(false, |id| id != document_id) {
puffin::profile_scope!("Document into sorter");
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter");
let _entered = span.enter();
document_word_positions_into_sorter(
current_document_id.unwrap(),
&del_word_pair_proximity,
@ -138,6 +142,10 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
if let Some(document_id) = current_document_id {
puffin::profile_scope!("Final document into sorter");
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "final_document_into_sorter");
let _entered = span.enter();
document_word_positions_into_sorter(
document_id,
&del_word_pair_proximity,
@ -147,6 +155,10 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
}
{
puffin::profile_scope!("sorter_into_reader");
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "sorter_into_reader");
let _entered = span.enter();
let mut writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,

View File

@ -18,7 +18,7 @@ use crate::{bucketed_position, DocumentId, Result};
///
/// Returns a grenad reader with the list of extracted words at positions and
/// documents ids from the given chunk of docid word positions.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_word_position_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
@ -94,6 +94,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
Ok(word_position_docids_reader)
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
fn words_position_into_sorter(
document_id: DocumentId,
key_buffer: &mut Vec<u8>,

View File

@ -14,8 +14,8 @@ use std::fs::File;
use std::io::BufReader;
use crossbeam_channel::Sender;
use log::debug;
use rayon::prelude::*;
use tracing::debug;
use self::extract_docid_word_positions::extract_docid_word_positions;
use self::extract_facet_number_docids::extract_facet_number_docids;
@ -41,6 +41,7 @@ use crate::{FieldId, FieldsIdsMap, Result};
/// Extract data for each databases from obkv documents in parallel.
/// Send data in grenad file over provided Sender.
#[allow(clippy::too_many_arguments)]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub(crate) fn data_from_obkv_documents(
original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
@ -113,7 +114,7 @@ pub(crate) fn data_from_obkv_documents(
{
let lmdb_writer_sx = lmdb_writer_sx.clone();
rayon::spawn(move || {
debug!("merge {} database", "facet-id-exists-docids");
debug!(database = "facet-id-exists-docids", "merge");
match facet_exists_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
Ok(reader) => {
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetExistsDocids(reader)));
@ -129,7 +130,7 @@ pub(crate) fn data_from_obkv_documents(
{
let lmdb_writer_sx = lmdb_writer_sx.clone();
rayon::spawn(move || {
debug!("merge {} database", "facet-id-is-null-docids");
debug!(database = "facet-id-is-null-docids", "merge");
match facet_is_null_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
Ok(reader) => {
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader)));
@ -145,7 +146,7 @@ pub(crate) fn data_from_obkv_documents(
{
let lmdb_writer_sx = lmdb_writer_sx.clone();
rayon::spawn(move || {
debug!("merge {} database", "facet-id-is-empty-docids");
debug!(database = "facet-id-is-empty-docids", "merge");
match facet_is_empty_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
Ok(reader) => {
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader)));
@ -257,13 +258,22 @@ fn spawn_extraction_task<FE, FS, M>(
M: MergeableReader + FromParallelIterator<M::Output> + Send + 'static,
M::Output: Send,
{
let current_span = tracing::Span::current();
rayon::spawn(move || {
puffin::profile_scope!("extract_multiple_chunks", name);
let child_span =
tracing::trace_span!(target: "", parent: &current_span, "extract_multiple_chunks");
let _entered = child_span.enter();
puffin::profile_scope!("extract_multiple_chunksdexing::details, ", name);
let chunks: Result<M> =
chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect();
let current_span = tracing::Span::current();
rayon::spawn(move || match chunks {
Ok(chunks) => {
debug!("merge {} database", name);
let child_span = tracing::trace_span!(target: "", parent: &current_span, "merge_multiple_chunks");
let _entered = child_span.enter();
debug!(database = name, "merge");
puffin::profile_scope!("merge_multiple_chunks", name);
let reader = chunks.merge(merge_fn, &indexer);
let _ = lmdb_writer_sx.send(reader.map(serialize_fn));

View File

@ -9,6 +9,10 @@ use super::{ClonableMmap, MergeFn};
use crate::update::index_documents::valid_lmdb_key;
use crate::Result;
/// This is something reasonable given the fact
/// that there is one grenad sorter by thread.
const MAX_GRENAD_SORTER_USAGE: usize = 500 * 1024 * 1024; // 500 MiB
pub type CursorClonableMmap = io::Cursor<ClonableMmap>;
pub fn create_writer<R: io::Write>(
@ -24,6 +28,9 @@ pub fn create_writer<R: io::Write>(
builder.build(BufWriter::new(file))
}
/// A helper function that creates a grenad sorter
/// with the given parameters. The max memory is
/// clamped to something reasonable.
pub fn create_sorter(
sort_algorithm: grenad::SortAlgorithm,
merge: MergeFn,
@ -41,7 +48,7 @@ pub fn create_sorter(
builder.max_nb_chunks(nb_chunks);
}
if let Some(memory) = max_memory {
builder.dump_threshold(memory);
builder.dump_threshold(memory.min(MAX_GRENAD_SORTER_USAGE));
builder.allow_realloc(false);
}
builder.sort_algorithm(sort_algorithm);
@ -49,6 +56,7 @@ pub fn create_sorter(
builder.build()
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")]
pub fn sorter_into_reader(
sorter: grenad::Sorter<MergeFn>,
indexer: GrenadParameters,
@ -187,10 +195,15 @@ impl Default for GrenadParameters {
impl GrenadParameters {
/// This function use the number of threads in the current threadpool to compute the value.
///
/// This should be called inside of a rayon thread pool,
/// Otherwise, it will take the global number of threads.
/// otherwise, it will take the global number of threads.
///
/// The max memory cannot exceed a given reasonable value.
pub fn max_memory_by_thread(&self) -> Option<usize> {
self.max_memory.map(|max_memory| max_memory / rayon::current_num_threads())
self.max_memory.map(|max_memory| {
(max_memory / rayon::current_num_threads()).min(MAX_GRENAD_SORTER_USAGE)
})
}
}
@ -240,6 +253,7 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
/// Write provided sorter in database using serialize_value function.
/// merge_values function is used if an entry already exist in the database.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")]
pub fn write_sorter_into_database<K, V, FS, FM>(
sorter: Sorter<MergeFn>,
database: &heed::Database<K, V>,

View File

@ -13,11 +13,11 @@ use std::result::Result as StdResult;
use crossbeam_channel::{Receiver, Sender};
use heed::types::Str;
use heed::Database;
use log::debug;
use rand::SeedableRng;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use slice_group_by::GroupBy;
use tracing::debug_span;
use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
use self::enrich::enrich_documents_batch;
@ -134,6 +134,7 @@ where
/// return an error and not the `IndexDocuments` struct as it is invalid to use it afterward.
///
/// Returns the number of documents added to the builder.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn add_documents<R: Read + Seek>(
mut self,
reader: DocumentsBatchReader<R>,
@ -179,6 +180,7 @@ where
/// Remove a batch of documents from the current builder.
///
/// Returns the number of documents deleted from the builder.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn remove_documents(
mut self,
to_delete: Vec<String>,
@ -214,6 +216,7 @@ where
/// - No batching using the standards `remove_documents` and `add_documents` took place
///
/// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::details")]
pub fn remove_documents_from_db_no_batch(
mut self,
to_delete: &RoaringBitmap,
@ -237,7 +240,12 @@ where
Ok((self, deleted_documents))
}
#[logging_timer::time("IndexDocuments::{}")]
#[tracing::instrument(
level = "trace"
skip_all,
target = "indexing::documents",
name = "index_documents"
)]
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
puffin::profile_function!();
@ -273,7 +281,12 @@ where
}
/// Returns the total number of documents in the index after the update.
#[logging_timer::time("IndexDocuments::{}")]
#[tracing::instrument(
level = "trace",
skip_all,
target = "profile::indexing::details",
name = "index_documents_raw"
)]
pub fn execute_raw(self, output: TransformOutput) -> Result<u64>
where
FP: Fn(UpdateIndexingStep) + Sync,
@ -374,8 +387,12 @@ where
let cloned_embedder = self.embedders.clone();
let current_span = tracing::Span::current();
// Run extraction pipeline in parallel.
pool.install(|| {
let child_span = tracing::trace_span!(target: "indexing::details", parent: &current_span, "extract_and_send_grenad_chunks");
let _enter = child_span.enter();
puffin::profile_scope!("extract_and_send_grenad_chunks");
// split obkv file into several chunks
let original_chunk_iter =
@ -489,10 +506,7 @@ where
documents_seen: documents_seen_count as usize,
total_documents: documents_count,
});
debug!(
"We have seen {} documents on {} total document so far",
documents_seen_count, documents_count
);
debug_span!("Seen", documents = documents_seen_count, total = documents_count);
}
if is_merged_database {
databases_seen += 1;
@ -543,7 +557,12 @@ where
Ok(number_of_documents)
}
#[logging_timer::time("IndexDocuments::{}")]
#[tracing::instrument(
level = "trace",
skip_all,
target = "indexing::prefix",
name = "index_documents_prefix_databases"
)]
pub fn execute_prefix_databases(
self,
word_docids: Option<grenad::Reader<CursorClonableMmap>>,
@ -598,6 +617,8 @@ where
let del_prefix_fst_words;
{
let span = tracing::trace_span!(target: "indexing::details", "compute_prefix_diffs");
let _entered = span.enter();
puffin::profile_scope!("compute_prefix_diffs");
current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
@ -722,6 +743,12 @@ where
/// Run the word prefix docids update operation.
#[allow(clippy::too_many_arguments)]
#[tracing::instrument(
level = "trace",
skip_all,
target = "indexing::prefix",
name = "index_documents_word_prefix_docids"
)]
fn execute_word_prefix_docids(
txn: &mut heed::RwTxn,
reader: grenad::Reader<Cursor<ClonableMmap>>,

View File

@ -146,7 +146,7 @@ impl<'a, 'i> Transform<'a, 'i> {
})
}
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn read_documents<R, FP, FA>(
&mut self,
reader: EnrichedDocumentsBatchReader<R>,
@ -359,7 +359,7 @@ impl<'a, 'i> Transform<'a, 'i> {
/// - If the document to remove was inserted by the `read_documents` method before but was NOT present in the db,
/// it's added into the grenad to ensure we don't insert it + removed from the list of new documents ids.
/// - If the document to remove was not present in either the db or the transform we do nothing.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn remove_documents<FA>(
&mut self,
mut to_remove: Vec<String>,
@ -450,7 +450,7 @@ impl<'a, 'i> Transform<'a, 'i> {
/// - No batching using the standards `remove_documents` and `add_documents` took place
///
/// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::details")]
pub fn remove_documents_from_db_no_batch<FA>(
&mut self,
to_remove: &RoaringBitmap,
@ -541,6 +541,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// Flatten a document from the fields ids map contained in self and insert the new
// created fields. Returns `None` if the document doesn't need to be flattened.
#[tracing::instrument(level = "trace", skip(self, obkv), target = "indexing::transform")]
fn flatten_from_fields_ids_map(&mut self, obkv: KvReader<FieldId>) -> Result<Option<Vec<u8>>> {
if obkv
.iter()
@ -661,7 +662,7 @@ impl<'a, 'i> Transform<'a, 'i> {
/// Generate the `TransformOutput` based on the given sorter that can be generated from any
/// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document
/// id for the user side and the value must be an obkv where keys are valid fields ids.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::transform")]
pub(crate) fn output_from_sorter<F>(
self,
wtxn: &mut heed::RwTxn,

View File

@ -115,6 +115,7 @@ impl TypedChunk {
/// Write typed chunk in the corresponding LMDB database of the provided index.
/// Return new documents seen.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
pub(crate) fn write_typed_chunk_into_index(
typed_chunk: TypedChunk,
index: &Index,
@ -126,6 +127,8 @@ pub(crate) fn write_typed_chunk_into_index(
let mut is_merged_database = false;
match typed_chunk {
TypedChunk::Documents(obkv_documents_iter) => {
let span = tracing::trace_span!(target: "indexing::write_db", "documents");
let _entered = span.enter();
let mut operations: Vec<DocumentOperation> = Default::default();
let mut docids = index.documents_ids(wtxn)?;
@ -172,6 +175,9 @@ pub(crate) fn write_typed_chunk_into_index(
index.put_documents_ids(wtxn, &docids)?;
}
TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_word_count_docids");
let _entered = span.enter();
append_entries_into_database(
fid_word_count_docids_iter,
&index.field_id_word_count_docids,
@ -187,6 +193,8 @@ pub(crate) fn write_typed_chunk_into_index(
exact_word_docids_reader,
word_fid_docids_reader,
} => {
let span = tracing::trace_span!(target: "indexing::write_db", "word_docids");
let _entered = span.enter();
let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_reader) }?;
append_entries_into_database(
word_docids_iter.clone(),
@ -230,6 +238,8 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true;
}
TypedChunk::WordPositionDocids(word_position_docids_iter) => {
let span = tracing::trace_span!(target: "indexing::write_db", "word_position_docids");
let _entered = span.enter();
append_entries_into_database(
word_position_docids_iter,
&index.word_position_docids,
@ -241,16 +251,25 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true;
}
TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => {
let span =
tracing::trace_span!(target: "indexing::write_db","field_id_facet_number_docids");
let _entered = span.enter();
let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter);
indexer.execute(wtxn)?;
is_merged_database = true;
}
TypedChunk::FieldIdFacetStringDocids(facet_id_string_docids_iter) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_string_docids");
let _entered = span.enter();
let indexer = FacetsUpdate::new(index, FacetType::String, facet_id_string_docids_iter);
indexer.execute(wtxn)?;
is_merged_database = true;
}
TypedChunk::FieldIdFacetExistsDocids(facet_id_exists_docids) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_exists_docids");
let _entered = span.enter();
append_entries_into_database(
facet_id_exists_docids,
&index.facet_id_exists_docids,
@ -262,6 +281,9 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true;
}
TypedChunk::FieldIdFacetIsNullDocids(facet_id_is_null_docids) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_null_docids");
let _entered = span.enter();
append_entries_into_database(
facet_id_is_null_docids,
&index.facet_id_is_null_docids,
@ -273,6 +295,8 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true;
}
TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => {
let span = tracing::trace_span!(target: "profile::indexing::write_db", "field_id_facet_is_empty_docids");
let _entered = span.enter();
append_entries_into_database(
facet_id_is_empty_docids,
&index.facet_id_is_empty_docids,
@ -284,6 +308,9 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true;
}
TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "word_pair_proximity_docids");
let _entered = span.enter();
append_entries_into_database(
word_pair_proximity_docids_iter,
&index.word_pair_proximity_docids,
@ -295,6 +322,9 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true;
}
TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_numbers");
let _entered = span.enter();
let index_fid_docid_facet_numbers =
index.field_id_docid_facet_f64s.remap_types::<Bytes, Bytes>();
let mut cursor = fid_docid_facet_number.into_cursor()?;
@ -315,6 +345,9 @@ pub(crate) fn write_typed_chunk_into_index(
}
}
TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_strings");
let _entered = span.enter();
let index_fid_docid_facet_strings =
index.field_id_docid_facet_strings.remap_types::<Bytes, Bytes>();
let mut cursor = fid_docid_facet_string.into_cursor()?;
@ -335,6 +368,8 @@ pub(crate) fn write_typed_chunk_into_index(
}
}
TypedChunk::GeoPoints(geo_points) => {
let span = tracing::trace_span!(target: "indexing::write_db", "geo_points");
let _entered = span.enter();
let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
let mut geo_faceted_docids = index.geo_faceted_documents_ids(wtxn)?;
@ -365,6 +400,8 @@ pub(crate) fn write_typed_chunk_into_index(
expected_dimension,
embedder_name,
} => {
let span = tracing::trace_span!(target: "indexing::write_db", "vector_points");
let _entered = span.enter();
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
)?;
@ -480,9 +517,11 @@ pub(crate) fn write_typed_chunk_into_index(
}
}
log::debug!("Finished vector chunk for {}", embedder_name);
tracing::debug!("Finished vector chunk for {}", embedder_name);
}
TypedChunk::ScriptLanguageDocids(sl_map) => {
let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids");
let _entered = span.enter();
for (key, (deletion, addition)) in sl_map {
let mut db_key_exists = false;
let final_value = match index.script_language_docids.get(wtxn, &key)? {
@ -536,6 +575,7 @@ fn merge_word_docids_reader_into_fst(
/// Write provided entries in database using serialize_value function.
/// merge_values function is used if an entry already exist in the database.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
fn write_entries_into_database<R, K, V, FS, FM>(
data: grenad::Reader<R>,
database: &heed::Database<K, V>,
@ -582,6 +622,7 @@ where
/// merge_values function is used if an entry already exist in the database.
/// All provided entries must be ordered.
/// If the index is not empty, write_entries_into_database is called instead.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
fn append_entries_into_database<R, K, V, FS, FM>(
data: grenad::Reader<R>,
database: &heed::Database<K, V>,

View File

@ -372,6 +372,11 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.embedder_settings = Setting::Reset;
}
#[tracing::instrument(
level = "trace"
skip(self, progress_callback, should_abort, old_fields_ids_map),
target = "indexing::documents"
)]
fn reindex<FP, FA>(
&mut self,
progress_callback: &FP,
@ -974,6 +979,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
crate::vector::settings::EmbeddingSettings::apply_default_source(
&mut setting,
);
crate::vector::settings::EmbeddingSettings::apply_default_openai_model(
&mut setting,
);
let setting = validate_embedding_settings(setting, &name)?;
changed = true;
new_configs.insert(name, setting);
@ -1119,6 +1127,14 @@ pub fn validate_embedding_settings(
let Setting::Set(settings) = settings else { return Ok(settings) };
let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } =
settings;
if let Some(0) = dimensions.set() {
return Err(crate::error::UserError::InvalidSettingsDimensions {
embedder_name: name.to_owned(),
}
.into());
}
let Some(inferred_source) = source.set() else {
return Ok(Setting::Set(EmbeddingSettings {
source,
@ -1132,14 +1148,34 @@ pub fn validate_embedding_settings(
match inferred_source {
EmbedderSource::OpenAi => {
check_unset(&revision, "revision", inferred_source, name)?;
check_unset(&dimensions, "dimensions", inferred_source, name)?;
if let Setting::Set(model) = &model {
crate::vector::openai::EmbeddingModel::from_name(model.as_str()).ok_or(
crate::error::UserError::InvalidOpenAiModel {
let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str())
.ok_or(crate::error::UserError::InvalidOpenAiModel {
embedder_name: name.to_owned(),
model: model.clone(),
},
)?;
})?;
if let Setting::Set(dimensions) = dimensions {
if !model.supports_overriding_dimensions()
&& dimensions != model.default_dimensions()
{
return Err(crate::error::UserError::InvalidOpenAiModelDimensions {
embedder_name: name.to_owned(),
model: model.name(),
dimensions,
expected_dimensions: model.default_dimensions(),
}
.into());
}
if dimensions > model.default_dimensions() {
return Err(crate::error::UserError::InvalidOpenAiModelDimensionsMax {
embedder_name: name.to_owned(),
model: model.name(),
dimensions,
max_dimensions: model.default_dimensions(),
}
.into());
}
}
}
}
EmbedderSource::HuggingFace => {

View File

@ -39,7 +39,12 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
}
}
#[logging_timer::time("WordPrefixDocids::{}")]
#[tracing::instrument(
level = "trace",
skip_all,
target = "indexing::prefix",
name = "word_prefix_docids"
)]
pub fn execute(
self,
mut new_word_docids_iter: grenad::ReaderCursor<CursorClonableMmap>,

View File

@ -4,7 +4,7 @@ use std::str;
use grenad::CompressionType;
use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Database};
use log::debug;
use tracing::debug;
use crate::error::SerializationError;
use crate::heed_codec::StrBEU16Codec;
@ -44,7 +44,12 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
}
}
#[logging_timer::time("WordPrefixIntegerDocids::{}")]
#[tracing::instrument(
level = "trace",
skip_all,
target = "indexing::prefix",
name = "words_prefix_integer_docids"
)]
pub fn execute(
self,
new_word_integer_docids: grenad::Reader<CursorClonableMmap>,

View File

@ -38,7 +38,12 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
self
}
#[logging_timer::time("WordsPrefixesFst::{}")]
#[tracing::instrument(
level = "trace",
skip_all,
target = "indexing::prefix",
name = "words_prefix_fst"
)]
pub fn execute(self) -> Result<()> {
puffin::profile_function!();

View File

@ -73,7 +73,7 @@ impl Embedder {
let device = match candle_core::Device::cuda_if_available(0) {
Ok(device) => device,
Err(error) => {
log::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error);
tracing::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error);
candle_core::Device::Cpu
}
};

View File

@ -17,6 +17,7 @@ pub struct Embedder {
pub struct EmbedderOptions {
pub api_key: Option<String>,
pub embedding_model: EmbeddingModel,
pub dimensions: Option<usize>,
}
#[derive(
@ -41,34 +42,50 @@ pub enum EmbeddingModel {
#[serde(rename = "text-embedding-ada-002")]
#[deserr(rename = "text-embedding-ada-002")]
TextEmbeddingAda002,
#[serde(rename = "text-embedding-3-small")]
#[deserr(rename = "text-embedding-3-small")]
TextEmbedding3Small,
#[serde(rename = "text-embedding-3-large")]
#[deserr(rename = "text-embedding-3-large")]
TextEmbedding3Large,
}
impl EmbeddingModel {
pub fn supported_models() -> &'static [&'static str] {
&["text-embedding-ada-002"]
&["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]
}
pub fn max_token(&self) -> usize {
match self {
EmbeddingModel::TextEmbeddingAda002 => 8191,
EmbeddingModel::TextEmbedding3Large => 8191,
EmbeddingModel::TextEmbedding3Small => 8191,
}
}
pub fn dimensions(&self) -> usize {
pub fn default_dimensions(&self) -> usize {
match self {
EmbeddingModel::TextEmbeddingAda002 => 1536,
EmbeddingModel::TextEmbedding3Large => 3072,
EmbeddingModel::TextEmbedding3Small => 1536,
}
}
pub fn name(&self) -> &'static str {
match self {
EmbeddingModel::TextEmbeddingAda002 => "text-embedding-ada-002",
EmbeddingModel::TextEmbedding3Large => "text-embedding-3-large",
EmbeddingModel::TextEmbedding3Small => "text-embedding-3-small",
}
}
pub fn from_name(name: &str) -> Option<Self> {
match name {
"text-embedding-ada-002" => Some(EmbeddingModel::TextEmbeddingAda002),
"text-embedding-3-large" => Some(EmbeddingModel::TextEmbedding3Large),
"text-embedding-3-small" => Some(EmbeddingModel::TextEmbedding3Small),
_ => None,
}
}
@ -78,6 +95,20 @@ impl EmbeddingModel {
EmbeddingModel::TextEmbeddingAda002 => {
Some(DistributionShift { current_mean: 0.90, current_sigma: 0.08 })
}
EmbeddingModel::TextEmbedding3Large => {
Some(DistributionShift { current_mean: 0.70, current_sigma: 0.1 })
}
EmbeddingModel::TextEmbedding3Small => {
Some(DistributionShift { current_mean: 0.75, current_sigma: 0.1 })
}
}
}
pub fn supports_overriding_dimensions(&self) -> bool {
match self {
EmbeddingModel::TextEmbeddingAda002 => false,
EmbeddingModel::TextEmbedding3Large => true,
EmbeddingModel::TextEmbedding3Small => true,
}
}
}
@ -86,11 +117,11 @@ pub const OPENAI_EMBEDDINGS_URL: &str = "https://api.openai.com/v1/embeddings";
impl EmbedderOptions {
pub fn with_default_model(api_key: Option<String>) -> Self {
Self { api_key, embedding_model: Default::default() }
Self { api_key, embedding_model: Default::default(), dimensions: None }
}
pub fn with_embedding_model(api_key: Option<String>, embedding_model: EmbeddingModel) -> Self {
Self { api_key, embedding_model }
Self { api_key, embedding_model, dimensions: None }
}
}
@ -142,12 +173,16 @@ impl Embedder {
let retry_duration = match result {
Ok(embeddings) => return Ok(embeddings),
Err(retry) => {
log::warn!("Failed: {}", retry.error);
tracing::warn!("Failed: {}", retry.error);
tokenized |= retry.must_tokenize();
retry.into_duration(attempt)
}
}?;
log::warn!("Attempt #{}, retrying after {}ms.", attempt, retry_duration.as_millis());
tracing::warn!(
"Attempt #{}, retrying after {}ms.",
attempt,
retry_duration.as_millis()
);
tokio::time::sleep(retry_duration).await;
}
@ -213,7 +248,7 @@ impl Embedder {
.map_err(EmbedError::openai_unexpected)
.map_err(Retry::retry_later)?;
log::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens(
error_response.error,
@ -235,9 +270,13 @@ impl Embedder {
client: &reqwest::Client,
) -> Result<Vec<Embeddings<f32>>, Retry> {
for text in texts {
log::trace!("Received prompt: {}", text.as_ref())
tracing::trace!("Received prompt: {}", text.as_ref())
}
let request = OpenAiRequest { model: self.options.embedding_model.name(), input: texts };
let request = OpenAiRequest {
model: self.options.embedding_model.name(),
input: texts,
dimensions: self.overriden_dimensions(),
};
let response = client
.post(OPENAI_EMBEDDINGS_URL)
.json(&request)
@ -254,7 +293,7 @@ impl Embedder {
.map_err(EmbedError::openai_unexpected)
.map_err(Retry::retry_later)?;
log::trace!("response: {:?}", response.data);
tracing::trace!("response: {:?}", response.data);
Ok(response
.data
@ -280,8 +319,7 @@ impl Embedder {
}
let mut tokens = encoded.as_slice();
let mut embeddings_for_prompt =
Embeddings::new(self.options.embedding_model.dimensions());
let mut embeddings_for_prompt = Embeddings::new(self.dimensions());
while tokens.len() > max_token_count {
let window = &tokens[..max_token_count];
embeddings_for_prompt.push(self.embed_tokens(window, client).await?).unwrap();
@ -322,8 +360,11 @@ impl Embedder {
tokens: &[usize],
client: &reqwest::Client,
) -> Result<Embedding, Retry> {
let request =
OpenAiTokensRequest { model: self.options.embedding_model.name(), input: tokens };
let request = OpenAiTokensRequest {
model: self.options.embedding_model.name(),
input: tokens,
dimensions: self.overriden_dimensions(),
};
let response = client
.post(OPENAI_EMBEDDINGS_URL)
.json(&request)
@ -366,12 +407,24 @@ impl Embedder {
}
pub fn dimensions(&self) -> usize {
self.options.embedding_model.dimensions()
if self.options.embedding_model.supports_overriding_dimensions() {
self.options.dimensions.unwrap_or(self.options.embedding_model.default_dimensions())
} else {
self.options.embedding_model.default_dimensions()
}
}
pub fn distribution(&self) -> Option<DistributionShift> {
self.options.embedding_model.distribution()
}
fn overriden_dimensions(&self) -> Option<usize> {
if self.options.embedding_model.supports_overriding_dimensions() {
self.options.dimensions
} else {
None
}
}
}
// retrying in case of failure
@ -431,12 +484,16 @@ impl Retry {
struct OpenAiRequest<'a, S: AsRef<str> + serde::Serialize> {
model: &'a str,
input: &'a [S],
#[serde(skip_serializing_if = "Option::is_none")]
dimensions: Option<usize>,
}
#[derive(Debug, Serialize)]
struct OpenAiTokensRequest<'a> {
model: &'a str,
input: &'a [usize],
#[serde(skip_serializing_if = "Option::is_none")]
dimensions: Option<usize>,
}
#[derive(Debug, Deserialize)]

View File

@ -1,6 +1,7 @@
use deserr::Deserr;
use serde::{Deserialize, Serialize};
use super::openai;
use crate::prompt::PromptData;
use crate::update::Setting;
use crate::vector::EmbeddingConfig;
@ -82,7 +83,7 @@ impl EmbeddingSettings {
Self::MODEL => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
Self::REVISION => &[EmbedderSource::HuggingFace],
Self::API_KEY => &[EmbedderSource::OpenAi],
Self::DIMENSIONS => &[EmbedderSource::UserProvided],
Self::DIMENSIONS => &[EmbedderSource::OpenAi, EmbedderSource::UserProvided],
Self::DOCUMENT_TEMPLATE => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
_other => unreachable!("unknown field"),
}
@ -90,9 +91,13 @@ impl EmbeddingSettings {
pub fn allowed_fields_for_source(source: EmbedderSource) -> &'static [&'static str] {
match source {
EmbedderSource::OpenAi => {
&[Self::SOURCE, Self::MODEL, Self::API_KEY, Self::DOCUMENT_TEMPLATE]
}
EmbedderSource::OpenAi => &[
Self::SOURCE,
Self::MODEL,
Self::API_KEY,
Self::DOCUMENT_TEMPLATE,
Self::DIMENSIONS,
],
EmbedderSource::HuggingFace => {
&[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE]
}
@ -109,6 +114,17 @@ impl EmbeddingSettings {
*source = Setting::Set(EmbedderSource::default())
}
}
pub(crate) fn apply_default_openai_model(setting: &mut Setting<EmbeddingSettings>) {
if let Setting::Set(EmbeddingSettings {
source: Setting::Set(EmbedderSource::OpenAi),
model: model @ (Setting::NotSet | Setting::Reset),
..
}) = setting
{
*model = Setting::Set(openai::EmbeddingModel::default().name().to_owned())
}
}
}
#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
@ -176,7 +192,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
model: Setting::Set(options.embedding_model.name().to_owned()),
revision: Setting::NotSet,
api_key: options.api_key.map(Setting::Set).unwrap_or_default(),
dimensions: Setting::NotSet,
dimensions: options.dimensions.map(Setting::Set).unwrap_or_default(),
document_template: Setting::Set(prompt.template),
},
super::EmbedderOptions::UserProvided(options) => Self {
@ -208,6 +224,9 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
if let Some(api_key) = api_key.set() {
options.api_key = Some(api_key);
}
if let Some(dimensions) = dimensions.set() {
options.dimensions = Some(dimensions);
}
this.embedder_options = super::EmbedderOptions::OpenAi(options);
}
EmbedderSource::HuggingFace => {

23
tracing-trace/Cargo.toml Normal file
View File

@ -0,0 +1,23 @@
[package]
name = "tracing-trace"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
color-spantrace = "0.2.1"
fxprof-processed-profile = "0.6.0"
serde = { version = "1.0.195", features = ["derive"] }
serde_json = "1.0.111"
tracing = "0.1.40"
tracing-error = "0.2.0"
tracing-subscriber = "0.3.18"
byte-unit = { version = "4.0.19", default-features = false, features = [
"std",
"serde",
] }
tokio = { version = "1.35.1", features = ["sync"] }
[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies]
libproc = "0.14.2"

View File

@ -0,0 +1,21 @@
use std::ffi::OsString;
use std::io::Write;
use serde_json::json;
fn main() {
let input_file = std::env::args_os().nth(1).expect("missing <INPUT> file");
let input =
std::io::BufReader::new(std::fs::File::open(&input_file).expect("could not open <INPUT>"));
let trace = tracing_trace::TraceReader::new(input);
let profile = tracing_trace::processor::span_stats::to_call_stats(trace).unwrap();
let mut output_file = OsString::new();
output_file.push("callstats-");
output_file.push(input_file);
let mut output_file = std::io::BufWriter::new(std::fs::File::create(output_file).unwrap());
for (key, value) in profile {
serde_json::to_writer(&mut output_file, &json!({key: value})).unwrap();
writeln!(&mut output_file).unwrap();
}
output_file.flush().unwrap();
}

View File

@ -0,0 +1,18 @@
use std::ffi::OsString;
use std::io::Write;
fn main() {
let input_file = std::env::args_os().nth(1).expect("missing <INPUT> file");
let input =
std::io::BufReader::new(std::fs::File::open(&input_file).expect("could not open <INPUT>"));
let trace = tracing_trace::TraceReader::new(input);
let profile =
tracing_trace::processor::firefox_profiler::to_firefox_profile(trace, "Meilisearch")
.unwrap();
let mut output_file = OsString::new();
output_file.push("firefox-");
output_file.push(input_file);
let mut output_file = std::io::BufWriter::new(std::fs::File::create(output_file).unwrap());
serde_json::to_writer(&mut output_file, &profile).unwrap();
output_file.flush().unwrap();
}

141
tracing-trace/src/entry.rs Normal file
View File

@ -0,0 +1,141 @@
use std::borrow::Cow;
use serde::{Deserialize, Serialize};
use tracing::span::Id as TracingId;
#[derive(Debug, Serialize, Deserialize)]
pub enum Entry {
/// A code location was accessed for the first time
NewCallsite(NewCallsite),
/// A new thread was accessed
NewThread(NewThread),
/// A new call started
NewSpan(NewSpan),
/// An already in-flight call started doing work.
///
/// For synchronous functions, open should always be followed immediately by enter, exit and close,
/// but for asynchronous functions, work can suspend (exiting the span without closing it), and then
/// later resume (entering the span again without opening it).
///
/// The timer for a span only starts when the span is entered.
SpanEnter(SpanEnter),
/// An in-flight call suspended and paused work.
///
/// For synchronous functions, exit should always be followed immediately by close,
/// but for asynchronous functions, work can suspend and then later resume.
///
/// The timer for a span pauses when the span is exited.
SpanExit(SpanExit),
/// A call ended
SpanClose(SpanClose),
/// An event occurred
Event(Event),
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct SpanId(u64);
impl From<&TracingId> for SpanId {
fn from(value: &TracingId) -> Self {
Self(value.into_u64())
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct NewCallsite {
pub call_id: ResourceId,
pub name: Cow<'static, str>,
pub module_path: Option<Cow<'static, str>>,
pub file: Option<Cow<'static, str>>,
pub line: Option<u32>,
pub target: Cow<'static, str>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct NewThread {
pub thread_id: ResourceId,
pub name: Option<String>,
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub struct SpanEnter {
pub id: SpanId,
pub time: std::time::Duration,
pub memory: Option<MemoryStats>,
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub struct SpanExit {
pub id: SpanId,
pub time: std::time::Duration,
pub memory: Option<MemoryStats>,
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub struct Event {
pub call_id: ResourceId,
pub thread_id: ResourceId,
pub parent_id: Option<SpanId>,
pub time: std::time::Duration,
pub memory: Option<MemoryStats>,
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub struct NewSpan {
pub id: SpanId,
pub call_id: ResourceId,
pub parent_id: Option<SpanId>,
pub thread_id: ResourceId,
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub struct SpanClose {
pub id: SpanId,
pub time: std::time::Duration,
}
/// A struct with a memory allocation stat.
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
pub struct MemoryStats {
/// Resident set size, measured in bytes.
/// (same as VmRSS in /proc/<pid>/status).
pub resident: u64,
}
impl MemoryStats {
#[cfg(any(target_os = "linux", target_os = "macos"))]
pub fn fetch() -> Option<Self> {
use libproc::libproc::pid_rusage::{pidrusage, RUsageInfoV0};
match pidrusage(std::process::id() as i32) {
Ok(RUsageInfoV0 { ri_resident_size, .. }) => {
Some(MemoryStats { resident: ri_resident_size })
}
Err(_) => None, /* ignoring error to avoid spamming */
}
}
#[cfg(not(any(target_os = "linux", target_os = "macos")))]
pub fn fetch() -> Option<Self> {
None
}
pub fn checked_sub(self, other: Self) -> Option<Self> {
Some(Self { resident: self.resident.checked_sub(other.resident)? })
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct ResourceId(pub(crate) usize);
impl ResourceId {
pub fn to_usize(self) -> usize {
self.0
}
}

View File

@ -0,0 +1,21 @@
#[derive(Debug)]
pub enum Error {
Json(serde_json::Error),
}
impl std::error::Error for Error {}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("error de/serializing trace entry:")?;
match self {
Error::Json(error) => std::fmt::Display::fmt(&error, f),
}
}
}
impl From<serde_json::Error> for Error {
fn from(value: serde_json::Error) -> Self {
Self::Json(value)
}
}

208
tracing-trace/src/layer.rs Normal file
View File

@ -0,0 +1,208 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::io::Write;
use std::ops::ControlFlow;
use std::sync::RwLock;
use tracing::span::{Attributes, Id as TracingId};
use tracing::{Metadata, Subscriber};
use tracing_subscriber::layer::Context;
use tracing_subscriber::Layer;
use crate::entry::{
Entry, Event, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter,
SpanExit, SpanId,
};
use crate::{Error, Trace, TraceWriter};
/// Layer that measures the time spent in spans.
pub struct TraceLayer {
sender: tokio::sync::mpsc::UnboundedSender<Entry>,
callsites: RwLock<HashMap<OpaqueIdentifier, ResourceId>>,
start_time: std::time::Instant,
profile_memory: bool,
}
impl Trace {
pub fn new(profile_memory: bool) -> (Self, TraceLayer) {
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
let trace = Trace { receiver };
let layer = TraceLayer {
sender,
callsites: Default::default(),
start_time: std::time::Instant::now(),
profile_memory,
};
(trace, layer)
}
}
impl<W: Write> TraceWriter<W> {
pub fn new(writer: W, profile_memory: bool) -> (Self, TraceLayer) {
let (trace, layer) = Trace::new(profile_memory);
(trace.into_writer(writer), layer)
}
pub async fn receive(&mut self) -> Result<ControlFlow<(), ()>, Error> {
let Some(entry) = self.receiver.recv().await else {
return Ok(ControlFlow::Break(()));
};
self.write(entry)?;
Ok(ControlFlow::Continue(()))
}
/// Panics if called from an asynchronous context
pub fn blocking_receive(&mut self) -> Result<ControlFlow<(), ()>, Error> {
let Some(entry) = self.receiver.blocking_recv() else {
return Ok(ControlFlow::Break(()));
};
self.write(entry)?;
Ok(ControlFlow::Continue(()))
}
pub fn write(&mut self, entry: Entry) -> Result<(), Error> {
Ok(serde_json::ser::to_writer(&mut self.writer, &entry)?)
}
pub fn try_receive(&mut self) -> Result<ControlFlow<(), ()>, Error> {
let Ok(entry) = self.receiver.try_recv() else {
return Ok(ControlFlow::Break(()));
};
self.write(entry)?;
Ok(ControlFlow::Continue(()))
}
pub fn flush(&mut self) -> Result<(), std::io::Error> {
self.writer.flush()
}
}
#[derive(PartialEq, Eq, Hash)]
enum OpaqueIdentifier {
Thread(std::thread::ThreadId),
Call(tracing::callsite::Identifier),
}
impl TraceLayer {
fn resource_id(&self, opaque: OpaqueIdentifier) -> Option<ResourceId> {
self.callsites.read().unwrap().get(&opaque).copied()
}
fn register_resource_id(&self, opaque: OpaqueIdentifier) -> ResourceId {
let mut map = self.callsites.write().unwrap();
let len = map.len();
*map.entry(opaque).or_insert(ResourceId(len))
}
fn elapsed(&self) -> std::time::Duration {
self.start_time.elapsed()
}
fn memory_stats(&self) -> Option<MemoryStats> {
if self.profile_memory {
MemoryStats::fetch()
} else {
None
}
}
fn send(&self, entry: Entry) {
// we never care that the other end hanged on us
let _ = self.sender.send(entry);
}
fn register_callsite(&self, metadata: &'static Metadata<'static>) -> ResourceId {
let call_id = self.register_resource_id(OpaqueIdentifier::Call(metadata.callsite()));
let module_path = metadata.module_path();
let file = metadata.file();
let line = metadata.line();
let name = metadata.name();
let target = metadata.target();
self.send(Entry::NewCallsite(NewCallsite {
call_id,
module_path: module_path.map(Cow::Borrowed),
file: file.map(Cow::Borrowed),
line,
name: Cow::Borrowed(name),
target: Cow::Borrowed(target),
}));
call_id
}
fn register_thread(&self) -> ResourceId {
let thread_id = std::thread::current().id();
let name = std::thread::current().name().map(ToOwned::to_owned);
let thread_id = self.register_resource_id(OpaqueIdentifier::Thread(thread_id));
self.send(Entry::NewThread(NewThread { thread_id, name }));
thread_id
}
}
impl<S> Layer<S> for TraceLayer
where
S: Subscriber,
{
fn on_new_span(&self, attrs: &Attributes<'_>, id: &TracingId, _ctx: Context<'_, S>) {
let call_id = self
.resource_id(OpaqueIdentifier::Call(attrs.metadata().callsite()))
.unwrap_or_else(|| self.register_callsite(attrs.metadata()));
let thread_id = self
.resource_id(OpaqueIdentifier::Thread(std::thread::current().id()))
.unwrap_or_else(|| self.register_thread());
let parent_id = attrs
.parent()
.cloned()
.or_else(|| tracing::Span::current().id())
.map(|id| SpanId::from(&id));
self.send(Entry::NewSpan(NewSpan { id: id.into(), call_id, parent_id, thread_id }));
}
fn on_enter(&self, id: &TracingId, _ctx: Context<'_, S>) {
self.send(Entry::SpanEnter(SpanEnter {
id: id.into(),
time: self.elapsed(),
memory: self.memory_stats(),
}))
}
fn on_exit(&self, id: &TracingId, _ctx: Context<'_, S>) {
self.send(Entry::SpanExit(SpanExit {
id: id.into(),
time: self.elapsed(),
memory: self.memory_stats(),
}))
}
fn on_event(&self, event: &tracing::Event<'_>, _ctx: Context<'_, S>) {
let call_id = self
.resource_id(OpaqueIdentifier::Call(event.metadata().callsite()))
.unwrap_or_else(|| self.register_callsite(event.metadata()));
let thread_id = self
.resource_id(OpaqueIdentifier::Thread(std::thread::current().id()))
.unwrap_or_else(|| self.register_thread());
let parent_id = event
.parent()
.cloned()
.or_else(|| tracing::Span::current().id())
.map(|id| SpanId::from(&id));
self.send(Entry::Event(Event {
call_id,
thread_id,
parent_id,
time: self.elapsed(),
memory: self.memory_stats(),
}))
}
fn on_close(&self, id: TracingId, _ctx: Context<'_, S>) {
self.send(Entry::SpanClose(SpanClose { id: Into::into(&id), time: self.elapsed() }))
}
}

54
tracing-trace/src/lib.rs Normal file
View File

@ -0,0 +1,54 @@
use std::io::{Read, Write};
use entry::Entry;
pub mod entry;
mod error;
pub mod layer;
pub mod processor;
pub use error::Error;
pub struct TraceWriter<W: Write> {
writer: W,
receiver: tokio::sync::mpsc::UnboundedReceiver<Entry>,
}
pub struct Trace {
receiver: tokio::sync::mpsc::UnboundedReceiver<Entry>,
}
impl Trace {
pub fn into_receiver(self) -> tokio::sync::mpsc::UnboundedReceiver<Entry> {
self.receiver
}
pub fn into_writer<W: Write>(self, writer: W) -> TraceWriter<W> {
TraceWriter { writer, receiver: self.receiver }
}
}
pub struct TraceReader<R: Read> {
reader: R,
}
impl<R: Read> TraceReader<R> {
pub fn new(reader: R) -> Self {
Self { reader }
}
fn read(&mut self) -> Option<Result<Entry, Error>> {
serde_json::Deserializer::from_reader(&mut self.reader)
.into_iter()
.next()
.map(|res| res.map_err(Into::into))
}
}
impl<R: Read> Iterator for TraceReader<R> {
type Item = Result<Entry, Error>;
fn next(&mut self) -> Option<Self::Item> {
self.read()
}
}

122
tracing-trace/src/main.rs Normal file
View File

@ -0,0 +1,122 @@
use tracing::{instrument, Span};
use tracing_error::{ErrorLayer, InstrumentResult, SpanTrace, TracedError};
#[instrument(level = "trace", target = "profile::indexing")]
fn foo() -> Result<(), TracedError<Error>> {
let _ = bar(40, 2);
bar(40, 2)
}
#[derive(Debug)]
pub enum Error {
XTooBig,
}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("x too big")
}
}
impl std::error::Error for Error {}
#[instrument(level = "trace", target = "profile::indexing")]
fn bar(x: u32, y: u32) -> Result<(), TracedError<Error>> {
let handle_ok = spawn_in_current_scope(move || baz(y));
let handle = spawn_in_current_scope(move || baz(x + y));
handle_ok.join().unwrap().and(handle.join().unwrap())
}
pub fn spawn_in_current_scope<F, T>(f: F) -> std::thread::JoinHandle<T>
where
F: FnOnce() -> T + Send + 'static,
T: Send + 'static,
{
let current = Span::current();
std::thread::spawn(move || {
let span = tracing::trace_span!(parent: &current, "thread_spawn", id = ?std::thread::current().id(), name = tracing::field::Empty);
if let Some(name) = std::thread::current().name() {
span.record("name", name);
}
span.in_scope(f)
})
}
#[instrument(level = "trace", target = "profile::indexing")]
fn baz(x: u32) -> Result<(), TracedError<Error>> {
if x > 10 {
fibo_recursive(10);
return Err(Error::XTooBig).in_current_span();
}
Ok(())
}
#[instrument(level = "trace", target = "profile::indexing")]
fn fibo_recursive(n: u32) -> u32 {
if n == 0 {
return 1;
}
if n == 1 {
return 2;
}
return fibo_recursive(n - 1) - fibo_recursive(n - 2);
}
use tracing_error::ExtractSpanTrace as _;
use tracing_subscriber::layer::SubscriberExt as _;
use tracing_trace::processor;
fn on_panic(info: &std::panic::PanicInfo) {
let info = info.to_string();
let trace = SpanTrace::capture();
tracing::error!(%info, %trace);
}
fn main() {
let (mut trace, profiling_layer) =
tracing_trace::TraceWriter::new(std::fs::File::create("trace.json").unwrap(), true);
let subscriber = tracing_subscriber::registry()
// any number of other subscriber layers may be added before or
// after the `ErrorLayer`...
.with(ErrorLayer::default())
.with(profiling_layer);
// set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();
std::panic::set_hook(Box::new(on_panic));
let res = foo();
if let Err(error) = res {
print_extracted_spantraces(&error)
}
while trace.try_receive().unwrap().is_continue() {}
trace.flush().unwrap();
let trace = tracing_trace::TraceReader::new(std::fs::File::open("trace.json").unwrap());
let profile = processor::firefox_profiler::to_firefox_profile(trace, "test").unwrap();
serde_json::to_writer(std::fs::File::create("processed.json").unwrap(), &profile).unwrap();
}
fn print_extracted_spantraces(error: &(dyn std::error::Error + 'static)) {
let mut error = Some(error);
let mut ind = 0;
eprintln!("Error:");
while let Some(err) = error {
if let Some(spantrace) = err.span_trace() {
eprintln!("found a spantrace:\n{}", color_spantrace::colorize(spantrace));
} else {
eprintln!("{:>4}: {}", ind, err);
}
error = err.source();
ind += 1;
}
}

View File

@ -0,0 +1,450 @@
use std::collections::HashMap;
use fxprof_processed_profile::{
CategoryPairHandle, CounterHandle, CpuDelta, Frame, FrameFlags, FrameInfo, MarkerDynamicField,
MarkerFieldFormat, MarkerLocation, MarkerSchema, MarkerSchemaField, ProcessHandle, Profile,
ProfilerMarker, ReferenceTimestamp, SamplingInterval, StringHandle, Timestamp,
};
use serde_json::json;
use crate::entry::{
Entry, Event, MemoryStats, NewCallsite, NewSpan, ResourceId, SpanClose, SpanEnter, SpanExit,
SpanId,
};
use crate::{Error, TraceReader};
pub fn to_firefox_profile<R: std::io::Read>(
trace: TraceReader<R>,
app: &str,
) -> Result<Profile, Error> {
let mut profile = Profile::new(
app,
ReferenceTimestamp::from_millis_since_unix_epoch(0.0),
SamplingInterval::from_nanos(15),
);
let mut last_timestamp = Timestamp::from_nanos_since_reference(0);
let main = profile.add_process(app, 0, last_timestamp);
let mut calls = HashMap::new();
let mut threads = HashMap::new();
let mut spans = HashMap::new();
let category = profile.add_category("general", fxprof_processed_profile::CategoryColor::Blue);
let subcategory = profile.add_subcategory(category, "subcategory");
let mut last_memory = MemoryStats::default();
let mut memory_counters = None;
for entry in trace {
let entry = entry?;
match entry {
Entry::NewCallsite(callsite) => {
let string_handle = profile.intern_string(callsite.name.as_ref());
calls.insert(callsite.call_id, (callsite, string_handle));
}
Entry::NewThread(thread) => {
let thread_handle = profile.add_thread(
main,
thread.thread_id.to_usize() as u32,
last_timestamp,
threads.is_empty(),
);
if let Some(name) = &thread.name {
profile.set_thread_name(thread_handle, name)
}
threads.insert(thread.thread_id, thread_handle);
}
Entry::NewSpan(span) => {
spans.insert(span.id, (span, SpanStatus::Outside));
}
Entry::SpanEnter(SpanEnter { id, time, memory }) => {
let (_span, status) = spans.get_mut(&id).unwrap();
let SpanStatus::Outside = status else {
continue;
};
*status = SpanStatus::Inside { time, memory };
last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64);
add_memory_samples(
&mut profile,
main,
memory,
last_timestamp,
&mut memory_counters,
&mut last_memory,
);
}
Entry::SpanExit(SpanExit { id, time, memory }) => {
let (span, status) = spans.get_mut(&id).unwrap();
let SpanStatus::Inside { time: begin, memory: begin_memory } = status else {
continue;
};
last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64);
let begin = *begin;
let begin_memory = *begin_memory;
*status = SpanStatus::Outside;
let span = *span;
let thread_handle = threads.get(&span.thread_id).unwrap();
let frames = make_frames(span, &spans, &calls, subcategory);
profile.add_sample(
*thread_handle,
to_timestamp(begin),
frames.iter().rev().cloned(),
CpuDelta::ZERO,
1,
);
profile.add_sample(
*thread_handle,
to_timestamp(time),
frames.iter().rev().cloned(),
CpuDelta::from_nanos((time - begin).as_nanos() as u64),
1,
);
add_memory_samples(
&mut profile,
main,
memory,
last_timestamp,
&mut memory_counters,
&mut last_memory,
);
let (callsite, _) = calls.get(&span.call_id).unwrap();
let memory_delta =
begin_memory.zip(memory).and_then(|(begin, end)| end.checked_sub(begin));
let marker = SpanMarker { callsite, span: &span, memory_delta };
profile.add_marker_with_stack(
*thread_handle,
&callsite.name,
marker,
fxprof_processed_profile::MarkerTiming::Interval(
to_timestamp(begin),
to_timestamp(time),
),
frames.iter().rev().cloned(),
)
}
Entry::Event(event) => {
let span = event
.parent_id
.as_ref()
.and_then(|parent_id| spans.get(parent_id))
.and_then(|(span, status)| match status {
SpanStatus::Outside => None,
SpanStatus::Inside { .. } => Some(span),
})
.copied();
let timestamp = to_timestamp(event.time);
let thread_handle = threads.get(&event.thread_id).unwrap();
let frames = span
.map(|span| make_frames(span, &spans, &calls, subcategory))
.unwrap_or_default();
profile.add_sample(
*thread_handle,
timestamp,
frames.iter().rev().cloned(),
CpuDelta::ZERO,
1,
);
let memory_delta = add_memory_samples(
&mut profile,
main,
event.memory,
last_timestamp,
&mut memory_counters,
&mut last_memory,
);
let (callsite, _) = calls.get(&event.call_id).unwrap();
let marker = EventMarker { callsite, event: &event, memory_delta };
profile.add_marker_with_stack(
*thread_handle,
&callsite.name,
marker,
fxprof_processed_profile::MarkerTiming::Instant(timestamp),
frames.iter().rev().cloned(),
);
last_timestamp = timestamp;
}
Entry::SpanClose(SpanClose { id, time }) => {
spans.remove(&id);
last_timestamp = to_timestamp(time);
}
}
}
Ok(profile)
}
struct MemoryCounterHandles {
usage: CounterHandle,
}
impl MemoryCounterHandles {
fn new(profile: &mut Profile, main: ProcessHandle) -> Self {
let usage =
profile.add_counter(main, "mimmalloc", "Memory", "Amount of memory currently in use");
Self { usage }
}
}
fn add_memory_samples(
profile: &mut Profile,
main: ProcessHandle,
memory: Option<MemoryStats>,
last_timestamp: Timestamp,
memory_counters: &mut Option<MemoryCounterHandles>,
last_memory: &mut MemoryStats,
) -> Option<MemoryStats> {
let Some(stats) = memory else {
return None;
};
let memory_counters =
memory_counters.get_or_insert_with(|| MemoryCounterHandles::new(profile, main));
profile.add_counter_sample(
memory_counters.usage,
last_timestamp,
stats.resident as f64 - last_memory.resident as f64,
0,
);
let delta = stats.checked_sub(*last_memory);
*last_memory = stats;
delta
}
fn to_timestamp(time: std::time::Duration) -> Timestamp {
Timestamp::from_nanos_since_reference(time.as_nanos() as u64)
}
fn make_frames(
span: NewSpan,
spans: &HashMap<SpanId, (NewSpan, SpanStatus)>,
calls: &HashMap<ResourceId, (NewCallsite, StringHandle)>,
subcategory: CategoryPairHandle,
) -> Vec<FrameInfo> {
let mut frames = Vec::new();
let mut current_span = span;
loop {
let frame = make_frame(current_span, calls, subcategory);
frames.push(frame);
if let Some(parent) = current_span.parent_id {
current_span = spans.get(&parent).unwrap().0;
} else {
break;
}
}
frames
}
fn make_frame(
span: NewSpan,
calls: &HashMap<ResourceId, (NewCallsite, StringHandle)>,
subcategory: CategoryPairHandle,
) -> FrameInfo {
let (_, call) = calls.get(&span.call_id).unwrap();
FrameInfo { frame: Frame::Label(*call), category_pair: subcategory, flags: FrameFlags::empty() }
}
#[derive(Debug, Clone, Copy)]
enum SpanStatus {
Outside,
Inside { time: std::time::Duration, memory: Option<MemoryStats> },
}
struct SpanMarker<'a> {
span: &'a NewSpan,
callsite: &'a NewCallsite,
memory_delta: Option<MemoryStats>,
}
impl<'a> ProfilerMarker for SpanMarker<'a> {
const MARKER_TYPE_NAME: &'static str = "span";
fn schema() -> MarkerSchema {
let fields = vec![
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "filename",
label: "File name",
format: MarkerFieldFormat::FilePath,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "line",
label: "Line",
format: MarkerFieldFormat::Integer,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "module_path",
label: "Module path",
format: MarkerFieldFormat::String,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "span_id",
label: "Span ID",
format: MarkerFieldFormat::Integer,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "thread_id",
label: "Thread ID",
format: MarkerFieldFormat::Integer,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "resident",
label: "Resident set size, measured in bytes while this function was executing",
format: MarkerFieldFormat::Bytes,
searchable: false,
}),
];
MarkerSchema {
type_name: Self::MARKER_TYPE_NAME,
locations: vec![
MarkerLocation::MarkerTable,
MarkerLocation::MarkerChart,
MarkerLocation::TimelineOverview,
],
chart_label: None,
tooltip_label: Some("{marker.name} - {marker.data.filename}:{marker.data.line}"),
table_label: Some("{marker.data.filename}:{marker.data.line}"),
fields,
}
}
fn json_marker_data(&self) -> serde_json::Value {
let filename = self.callsite.file.as_deref();
let line = self.callsite.line;
let module_path = self.callsite.module_path.as_deref();
let span_id = self.span.id;
let thread_id = self.span.thread_id;
let mut value = json!({
"type": Self::MARKER_TYPE_NAME,
"filename": filename,
"line": line,
"module_path": module_path,
"span_id": span_id,
"thread_id": thread_id,
});
if let Some(MemoryStats { resident }) = self.memory_delta {
value["resident"] = json!(resident);
}
value
}
}
struct EventMarker<'a> {
event: &'a Event,
callsite: &'a NewCallsite,
memory_delta: Option<MemoryStats>,
}
impl<'a> ProfilerMarker for EventMarker<'a> {
const MARKER_TYPE_NAME: &'static str = "tracing-event";
fn schema() -> MarkerSchema {
let fields = vec![
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "filename",
label: "File name",
format: MarkerFieldFormat::FilePath,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "line",
label: "Line",
format: MarkerFieldFormat::Integer,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "module_path",
label: "Module path",
format: MarkerFieldFormat::String,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "parent_span_id",
label: "Parent Span ID",
format: MarkerFieldFormat::Integer,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "thread_id",
label: "Thread ID",
format: MarkerFieldFormat::Integer,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "resident",
label: "Resident set size, measured in bytes while this function was executing",
format: MarkerFieldFormat::Bytes,
searchable: false,
}),
];
MarkerSchema {
type_name: Self::MARKER_TYPE_NAME,
locations: vec![
MarkerLocation::MarkerTable,
MarkerLocation::MarkerChart,
MarkerLocation::TimelineOverview,
],
chart_label: None,
tooltip_label: Some("{marker.name} - {marker.data.filename}:{marker.data.line}"),
table_label: Some("{marker.data.filename}:{marker.data.line}"),
fields,
}
}
fn json_marker_data(&self) -> serde_json::Value {
let filename = self.callsite.file.as_deref();
let line = self.callsite.line;
let module_path = self.callsite.module_path.as_deref();
let span_id = self.event.parent_id;
let thread_id = self.event.thread_id;
let mut value = json!({
"type": Self::MARKER_TYPE_NAME,
"filename": filename,
"line": line,
"module_path": module_path,
"parent_span_id": span_id,
"thread_id": thread_id,
});
if let Some(MemoryStats { resident }) = self.memory_delta {
value["resident"] = json!(resident);
}
value
}
}

Some files were not shown because too many files have changed in this diff Show More