diff --git a/.github/workflows/bench-manual.yml b/.github/workflows/bench-manual.yml index 3f2b67d52..09699d94f 100644 --- a/.github/workflows/bench-manual.yml +++ b/.github/workflows/bench-manual.yml @@ -18,7 +18,7 @@ jobs: timeout-minutes: 180 # 3h steps: - uses: actions/checkout@v3 - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 with: profile: minimal diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml index 632c86d4e..1bcf16bfc 100644 --- a/.github/workflows/bench-pr.yml +++ b/.github/workflows/bench-pr.yml @@ -66,7 +66,7 @@ jobs: fetch-depth: 0 # fetch full history to be able to get main commit sha ref: ${{ steps.comment-branch.outputs.head_ref }} - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 with: profile: minimal diff --git a/.github/workflows/bench-push-indexing.yml b/.github/workflows/bench-push-indexing.yml index 0d9975eb7..0fca05f24 100644 --- a/.github/workflows/bench-push-indexing.yml +++ b/.github/workflows/bench-push-indexing.yml @@ -12,7 +12,7 @@ jobs: timeout-minutes: 180 # 3h steps: - uses: actions/checkout@v3 - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 with: profile: minimal diff --git a/.github/workflows/benchmarks-manual.yml b/.github/workflows/benchmarks-manual.yml index 14b77c83d..044f8a827 100644 --- a/.github/workflows/benchmarks-manual.yml +++ b/.github/workflows/benchmarks-manual.yml @@ -18,7 +18,7 @@ jobs: timeout-minutes: 4320 # 72h steps: - uses: actions/checkout@v3 - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 with: profile: minimal diff --git a/.github/workflows/benchmarks-pr.yml b/.github/workflows/benchmarks-pr.yml index 7c081932a..78f27541c 100644 --- a/.github/workflows/benchmarks-pr.yml +++ b/.github/workflows/benchmarks-pr.yml @@ -44,7 +44,7 @@ jobs: exit 1 fi - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 with: profile: minimal diff --git a/.github/workflows/benchmarks-push-indexing.yml b/.github/workflows/benchmarks-push-indexing.yml index 4495b4b9d..0144e20cf 100644 --- a/.github/workflows/benchmarks-push-indexing.yml +++ b/.github/workflows/benchmarks-push-indexing.yml @@ -16,7 +16,7 @@ jobs: timeout-minutes: 4320 # 72h steps: - uses: actions/checkout@v3 - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 with: profile: minimal diff --git a/.github/workflows/benchmarks-push-search-geo.yml b/.github/workflows/benchmarks-push-search-geo.yml index 22218cd6e..cce6cb9b9 100644 --- a/.github/workflows/benchmarks-push-search-geo.yml +++ b/.github/workflows/benchmarks-push-search-geo.yml @@ -15,7 +15,7 @@ jobs: runs-on: benchmarks steps: - uses: actions/checkout@v3 - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 with: profile: minimal diff --git a/.github/workflows/benchmarks-push-search-songs.yml b/.github/workflows/benchmarks-push-search-songs.yml index e9744a434..2ba584a69 100644 --- a/.github/workflows/benchmarks-push-search-songs.yml +++ b/.github/workflows/benchmarks-push-search-songs.yml @@ -15,7 +15,7 @@ jobs: runs-on: benchmarks steps: - uses: actions/checkout@v3 - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 with: profile: minimal diff --git a/.github/workflows/benchmarks-push-search-wiki.yml b/.github/workflows/benchmarks-push-search-wiki.yml index bc9e1bcd0..2436cc356 100644 --- a/.github/workflows/benchmarks-push-search-wiki.yml +++ b/.github/workflows/benchmarks-push-search-wiki.yml @@ -15,7 +15,7 @@ jobs: runs-on: benchmarks steps: - uses: actions/checkout@v3 - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 with: profile: minimal diff --git a/.github/workflows/flaky-tests.yml b/.github/workflows/flaky-tests.yml index 3fa9c549c..530767387 100644 --- a/.github/workflows/flaky-tests.yml +++ b/.github/workflows/flaky-tests.yml @@ -17,7 +17,7 @@ jobs: run: | apt-get update && apt-get install -y curl apt-get install build-essential -y - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 - name: Install cargo-flaky run: cargo install cargo-flaky - name: Run cargo flaky in the dumps diff --git a/.github/workflows/fuzzer-indexing.yml b/.github/workflows/fuzzer-indexing.yml index ad0962802..5da7f73ed 100644 --- a/.github/workflows/fuzzer-indexing.yml +++ b/.github/workflows/fuzzer-indexing.yml @@ -12,7 +12,7 @@ jobs: timeout-minutes: 4320 # 72h steps: - uses: actions/checkout@v3 - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 with: profile: minimal diff --git a/.github/workflows/publish-apt-brew-pkg.yml b/.github/workflows/publish-apt-brew-pkg.yml index 546ec1bee..143d3e7f4 100644 --- a/.github/workflows/publish-apt-brew-pkg.yml +++ b/.github/workflows/publish-apt-brew-pkg.yml @@ -25,7 +25,7 @@ jobs: run: | apt-get update && apt-get install -y curl apt-get install build-essential -y - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 - name: Install cargo-deb run: cargo install cargo-deb - uses: actions/checkout@v3 diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index c53946fea..fe0f95474 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -45,7 +45,7 @@ jobs: run: | apt-get update && apt-get install -y curl apt-get install build-essential -y - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 - name: Build run: cargo build --release --locked # No need to upload binaries for dry run (cron) @@ -75,7 +75,7 @@ jobs: asset_name: meilisearch-windows-amd64.exe steps: - uses: actions/checkout@v3 - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 - name: Build run: cargo build --release --locked # No need to upload binaries for dry run (cron) @@ -101,7 +101,7 @@ jobs: - name: Checkout repository uses: actions/checkout@v3 - name: Installing Rust toolchain - uses: dtolnay/rust-toolchain@1.79 + uses: dtolnay/rust-toolchain@1.81 with: profile: minimal target: ${{ matrix.target }} @@ -148,7 +148,7 @@ jobs: add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" apt-get update -y && apt-get install -y docker-ce - name: Installing Rust toolchain - uses: dtolnay/rust-toolchain@1.79 + uses: dtolnay/rust-toolchain@1.81 with: profile: minimal target: ${{ matrix.target }} diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index e56125ebf..fae93bd66 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -31,7 +31,7 @@ jobs: apt-get update && apt-get install -y curl apt-get install build-essential -y - name: Setup test with Rust stable - uses: dtolnay/rust-toolchain@1.79 + uses: dtolnay/rust-toolchain@1.81 - name: Cache dependencies uses: Swatinem/rust-cache@v2.7.7 - name: Run cargo check without any default features @@ -56,7 +56,7 @@ jobs: - uses: actions/checkout@v3 - name: Cache dependencies uses: Swatinem/rust-cache@v2.7.7 - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 with: @@ -81,7 +81,7 @@ jobs: run: | apt-get update apt-get install --assume-yes build-essential curl - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 - name: Run cargo build with almost all features run: | cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)" @@ -101,7 +101,7 @@ jobs: run: | apt-get update apt-get install --assume-yes build-essential curl - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 - name: Run cargo tree without default features and check lindera is not present run: | if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then @@ -125,7 +125,7 @@ jobs: run: | apt-get update && apt-get install -y curl apt-get install build-essential -y - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 - name: Cache dependencies uses: Swatinem/rust-cache@v2.7.7 - name: Run tests in debug @@ -139,7 +139,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 with: profile: minimal components: clippy @@ -156,7 +156,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 with: profile: minimal toolchain: nightly-2024-07-09 diff --git a/.github/workflows/update-cargo-toml-version.yml b/.github/workflows/update-cargo-toml-version.yml index d9d79d595..cda76e6bb 100644 --- a/.github/workflows/update-cargo-toml-version.yml +++ b/.github/workflows/update-cargo-toml-version.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: dtolnay/rust-toolchain@1.79 + - uses: dtolnay/rust-toolchain@1.81 with: profile: minimal - name: Install sd diff --git a/.gitignore b/.gitignore index 0d6750008..07453a58f 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ /dumps /bench /_xtask_benchmark.ms +/benchmarks # Snapshots ## ... large diff --git a/Cargo.lock b/Cargo.lock index 91c83fb13..172a67806 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -36,9 +36,9 @@ dependencies = [ [[package]] name = "actix-http" -version = "3.8.0" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae682f693a9cd7b058f2b0b5d9a6d7728a8555779bedbbc35dd88528611d020" +checksum = "d48f96fc3003717aeb9856ca3d02a8c7de502667ad76eeacd830b48d2e91fac4" dependencies = [ "actix-codec", "actix-rt", @@ -119,7 +119,7 @@ dependencies = [ "actix-utils", "futures-core", "futures-util", - "mio", + "mio 0.8.11", "num_cpus", "socket2 0.4.9", "tokio", @@ -168,9 +168,9 @@ dependencies = [ [[package]] name = "actix-web" -version = "4.8.0" +version = "4.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1988c02af8d2b718c05bc4aeb6a66395b7cdf32858c2c71131e5637a8c05a9ff" +checksum = "9180d76e5cc7ccbc4d60a506f2c727730b154010262df5b910eb17dbe4b8cb38" dependencies = [ "actix-codec", "actix-http", @@ -191,6 +191,7 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", + "impl-more", "itoa", "language-tags", "log", @@ -234,6 +235,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + [[package]] name = "aes" version = "0.8.4" @@ -296,9 +303,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.18" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] name = "anes" @@ -322,9 +329,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.6" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "anstyle-parse" @@ -356,9 +363,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" dependencies = [ "backtrace", ] @@ -371,9 +378,9 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c" [[package]] name = "arbitrary" -version = "1.3.2" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" +checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" dependencies = [ "derive_arbitrary", ] @@ -401,7 +408,7 @@ dependencies = [ "rayon", "roaring", "tempfile", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -420,7 +427,7 @@ dependencies = [ "rayon", "roaring", "tempfile", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -435,9 +442,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.81" +version = "0.1.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" +checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" dependencies = [ "proc-macro2", "quote", @@ -466,7 +473,7 @@ dependencies = [ "cc", "cfg-if", "libc", - "miniz_oxide", + "miniz_oxide 0.7.2", "object", "rustc-demangle", ] @@ -496,7 +503,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2 [[package]] name = "benchmarks" -version = "1.12.0" +version = "1.12.2" dependencies = [ "anyhow", "bumpalo", @@ -542,16 +549,14 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.69.4" +version = "0.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f" dependencies = [ "bitflags 2.6.0", "cexpr", "clang-sys", - "itertools 0.12.1", - "lazy_static", - "lazycell", + "itertools 0.13.0", "proc-macro2", "quote", "regex", @@ -678,9 +683,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.9.1" +version = "1.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" +checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0" dependencies = [ "memchr", "regex-automata", @@ -689,7 +694,7 @@ dependencies = [ [[package]] name = "build-info" -version = "1.12.0" +version = "1.12.2" dependencies = [ "anyhow", "time", @@ -715,16 +720,16 @@ dependencies = [ "allocator-api2", "bitpacking", "bumpalo", - "hashbrown 0.15.1", + "hashbrown 0.15.2", "serde", "serde_json", ] [[package]] name = "byte-unit" -version = "5.1.4" +version = "5.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ac19bdf0b2665407c39d82dbc937e951e7e2001609f0fb32edd0af45a2d63e" +checksum = "e1cd29c3c585209b0cbc7309bfe3ed7efd8c84c21b7af29c8bfae908f8777174" dependencies = [ "rust_decimal", "serde", @@ -761,9 +766,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" [[package]] name = "bytemuck" -version = "1.19.0" +version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d" +checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3" dependencies = [ "bytemuck_derive", ] @@ -787,9 +792,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.6.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" [[package]] name = "bytestring" @@ -832,15 +837,15 @@ dependencies = [ [[package]] name = "candle-core" -version = "0.6.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5b18de020c2729dbf7ac390325312644808b6ba9b7962f1f724e9185b1d53c7" +checksum = "855dfedff437d2681d68e1f34ae559d88b0dd84aa5a6b63f2c8e75ebdd875bbf" dependencies = [ "byteorder", "candle-kernels", "cudarc", "gemm", - "half 2.4.0", + "half 2.4.1", "memmap2", "num-traits", "num_cpus", @@ -848,45 +853,47 @@ dependencies = [ "rand_distr", "rayon", "safetensors", - "thiserror", + "thiserror 1.0.69", + "ug", + "ug-cuda", "yoke", "zip 1.1.4", ] [[package]] name = "candle-kernels" -version = "0.6.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bc0a71be8b2f0950b63fd602a5e10a74a4f94a5fd63059ae455e96163389488" +checksum = "53343628fa470b7075c28c589b98735b4220b464e37ddbb8e117040e199f4787" dependencies = [ "bindgen_cuda", ] [[package]] name = "candle-nn" -version = "0.6.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b006b30f66a0d94fc9cef0ac4de6ce510565f35ae2c6c35ce5d4aacfb0fc8eeb" +checksum = "ddd3c6b2ee0dfd64af12ae5b07e4b7c517898981cdaeffcb10b71d7dd5c8f359" dependencies = [ "candle-core", - "half 2.4.0", + "half 2.4.1", "num-traits", "rayon", "safetensors", "serde", - "thiserror", + "thiserror 1.0.69", ] [[package]] name = "candle-transformers" -version = "0.6.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f0d4eb6a0d9279d5829b06b2bf3caa117904eefd6dcf879d16e687c4a84034c" +checksum = "4270cc692c4a3df2051c2e8c3c4da3a189746af7ca3a547b99ecd335582b92e1" dependencies = [ "byteorder", "candle-core", "candle-nn", - "fancy-regex 0.13.0", + "fancy-regex", "num-traits", "rand", "rayon", @@ -907,23 +914,23 @@ dependencies = [ [[package]] name = "cargo_metadata" -version = "0.18.1" +version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d886547e41f740c616ae73108f6eb70afe6d940c7bc697cb30f13daec073037" +checksum = "8769706aad5d996120af43197bf46ef6ad0fda35216b4505f926a365a232d924" dependencies = [ "camino", "cargo-platform", "semver", "serde", "serde_json", - "thiserror", + "thiserror 2.0.9", ] [[package]] name = "cargo_toml" -version = "0.20.3" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4895c018bb228aa6b3ba1a0285543fcb4b704734c3fb1f72afaa75aa769500c1" +checksum = "5fbd1fe9db3ebf71b89060adaf7b0504c2d6a425cf061313099547e382c2e472" dependencies = [ "serde", "toml", @@ -1058,9 +1065,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.9" +version = "4.5.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64acc1846d54c1fe936a78dc189c34e28d3f5afc348403f28ecf53660b9b8462" +checksum = "9560b07a799281c7e0958b9296854d6fafd4c5f31444a7e5bb1ad6dde5ccf1bd" dependencies = [ "clap_builder", "clap_derive", @@ -1068,9 +1075,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.9" +version = "4.5.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb8393d67ba2e7bfaf28a23458e4e2b543cc73a99595511eb207fdb8aede942" +checksum = "874e0dd3eb68bf99058751ac9712f622e61e6f393a94f7128fa26e3f02f5c7cd" dependencies = [ "anstream", "anstyle", @@ -1080,9 +1087,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.8" +version = "4.5.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085" +checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -1092,9 +1099,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.1" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "color-spantrace" @@ -1192,9 +1199,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpufeatures" @@ -1267,9 +1274,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.13" +version = "0.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" dependencies = [ "crossbeam-utils", ] @@ -1326,9 +1333,9 @@ dependencies = [ [[package]] name = "csv" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" dependencies = [ "csv-core", "itoa", @@ -1347,11 +1354,11 @@ dependencies = [ [[package]] name = "cudarc" -version = "0.11.7" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56ee2a3fbbd981e1c7ea73cc2af136e754eb22d17436de37155227ee4dbe0cf4" +checksum = "8cd76de2aa3a7bdb9a65941ea5a3c688d941688f736a81b2fc5beb88747a7f25" dependencies = [ - "half 2.4.0", + "half 2.4.1", "libloading", ] @@ -1367,12 +1374,12 @@ dependencies = [ [[package]] name = "darling" -version = "0.20.9" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" dependencies = [ - "darling_core 0.20.9", - "darling_macro 0.20.9", + "darling_core 0.20.10", + "darling_macro 0.20.10", ] [[package]] @@ -1391,9 +1398,9 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.20.9" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" dependencies = [ "fnv", "ident_case", @@ -1416,11 +1423,11 @@ dependencies = [ [[package]] name = "darling_macro" -version = "0.20.9" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ - "darling_core 0.20.9", + "darling_core 0.20.10", "quote", "syn 2.0.87", ] @@ -1454,9 +1461,9 @@ dependencies = [ [[package]] name = "deflate64" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83ace6c86376be0b6cdcf3fb41882e81d94b31587573d1cfa9d01cd06bba210d" +checksum = "da692b8d1080ea3045efaab14434d40468c3d8657e42abddfffca87b428f4c1b" [[package]] name = "deranged" @@ -1470,9 +1477,9 @@ dependencies = [ [[package]] name = "derive_arbitrary" -version = "1.3.2" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" +checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800" dependencies = [ "proc-macro2", "quote", @@ -1490,11 +1497,11 @@ dependencies = [ [[package]] name = "derive_builder" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" dependencies = [ - "derive_builder_macro 0.20.0", + "derive_builder_macro 0.20.2", ] [[package]] @@ -1511,11 +1518,11 @@ dependencies = [ [[package]] name = "derive_builder_core" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" dependencies = [ - "darling 0.20.9", + "darling 0.20.10", "proc-macro2", "quote", "syn 2.0.87", @@ -1533,11 +1540,11 @@ dependencies = [ [[package]] name = "derive_builder_macro" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ - "derive_builder_core 0.20.0", + "derive_builder_core 0.20.2", "syn 2.0.87", ] @@ -1556,9 +1563,9 @@ dependencies = [ [[package]] name = "deserr" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe971a2a48625fda3198032f35de60939828c4aed47d76715c21698801b985c" +checksum = "7a4da990b9de75d39b5fb69bb0105ec3f1726d43b71b3ddb359cf38470017a56" dependencies = [ "actix-http", "actix-utils", @@ -1573,9 +1580,9 @@ dependencies = [ [[package]] name = "deserr-internal" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aae89f00c97a75940185084a826c0aace055774ad57a58211625606449ea0bd8" +checksum = "aadef696fce456c704f10186def1bdc0a40e646c9f4f18cf091477acadb731d8" dependencies = [ "convert_case 0.6.0", "proc-macro2", @@ -1664,12 +1671,12 @@ dependencies = [ [[package]] name = "dump" -version = "1.12.0" +version = "1.12.2" dependencies = [ "anyhow", "big_s", "flate2", - "http 1.1.0", + "http 1.2.0", "maplit", "meili-snap", "meilisearch-types", @@ -1680,7 +1687,7 @@ dependencies = [ "serde_json", "tar", "tempfile", - "thiserror", + "thiserror 2.0.9", "time", "tracing", "uuid", @@ -1847,16 +1854,6 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" -[[package]] -name = "fancy-regex" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05" -dependencies = [ - "bit-set", - "regex", -] - [[package]] name = "fancy-regex" version = "0.13.0" @@ -1870,16 +1867,16 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "file-store" -version = "1.12.0" +version = "1.12.2" dependencies = [ "tempfile", - "thiserror", + "thiserror 2.0.9", "tracing", "uuid", ] @@ -1898,7 +1895,7 @@ dependencies = [ [[package]] name = "filter-parser" -version = "1.12.0" +version = "1.12.2" dependencies = [ "insta", "nom", @@ -1908,17 +1905,17 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.30" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" dependencies = [ "crc32fast", - "miniz_oxide", + "miniz_oxide 0.8.2", ] [[package]] name = "flatten-serde-json" -version = "1.12.0" +version = "1.12.2" dependencies = [ "criterion", "serde_json", @@ -1968,9 +1965,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" dependencies = [ "futures-channel", "futures-core", @@ -1983,9 +1980,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", "futures-sink", @@ -1993,15 +1990,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] name = "futures-executor" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" dependencies = [ "futures-core", "futures-task", @@ -2010,15 +2007,15 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" [[package]] name = "futures-macro" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", @@ -2027,21 +2024,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-util" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ "futures-channel", "futures-core", @@ -2057,7 +2054,7 @@ dependencies = [ [[package]] name = "fuzzers" -version = "1.12.0" +version = "1.12.2" dependencies = [ "arbitrary", "bumpalo", @@ -2151,7 +2148,7 @@ checksum = "a2e7ea062c987abcd8db95db917b4ffb4ecdfd0668471d8dc54734fdff2354e8" dependencies = [ "bytemuck", "dyn-stack", - "half 2.4.0", + "half 2.4.1", "num-complex", "num-traits", "once_cell", @@ -2172,7 +2169,7 @@ dependencies = [ "dyn-stack", "gemm-common", "gemm-f32", - "half 2.4.0", + "half 2.4.1", "num-complex", "num-traits", "paste", @@ -2229,9 +2226,9 @@ checksum = "36d244a08113319b5ebcabad2b8b7925732d15eec46d7e7ac3c11734f3b7a6ad" [[package]] name = "getrandom" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "js-sys", @@ -2240,18 +2237,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "getset" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e45727250e75cc04ff2846a66397da8ef2b3db8e40e0cef4df67950a07621eb9" -dependencies = [ - "proc-macro-error", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "gimli" version = "0.27.3" @@ -2320,7 +2305,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http 1.1.0", + "http 1.2.0", "indexmap", "slab", "tokio", @@ -2336,9 +2321,9 @@ checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" [[package]] name = "half" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5eceaaeec696539ddaf7b333340f1af35a5aa87ae3e4f3ead0532f72affab2e" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ "bytemuck", "cfg-if", @@ -2378,9 +2363,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.1" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" dependencies = [ "allocator-api2", "equivalent", @@ -2412,9 +2397,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "heed" -version = "0.20.3" +version = "0.20.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bc30da4a93ff8cb98e535d595d6de42731d4719d707bc1c86f579158751a24e" +checksum = "7d4f449bab7320c56003d37732a917e18798e2f1709d80263face2b4f9436ddb" dependencies = [ "bitflags 2.6.0", "byteorder", @@ -2453,6 +2438,12 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "hermit-abi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" + [[package]] name = "hex" version = "0.4.3" @@ -2465,13 +2456,13 @@ version = "0.3.2" source = "git+https://github.com/dureuill/hf-hub.git?branch=rust_tls#88d4f11cb9fa079f2912bacb96f5080b16825ce8" dependencies = [ "dirs", - "http 1.1.0", + "http 1.2.0", "indicatif", "log", "rand", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "ureq", ] @@ -2497,9 +2488,9 @@ dependencies = [ [[package]] name = "http" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" dependencies = [ "bytes", "fnv", @@ -2513,7 +2504,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" dependencies = [ "bytes", - "http 1.1.0", + "http 1.2.0", ] [[package]] @@ -2524,7 +2515,7 @@ checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body", "pin-project-lite", ] @@ -2551,7 +2542,7 @@ dependencies = [ "futures-channel", "futures-util", "h2 0.4.5", - "http 1.1.0", + "http 1.2.0", "http-body", "httparse", "httpdate", @@ -2569,7 +2560,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" dependencies = [ "futures-util", - "http 1.1.0", + "http 1.2.0", "hyper", "hyper-util", "rustls", @@ -2582,24 +2573,141 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.6" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ab92f4f49ee4fb4f997c784b7a2e0fa70050211e0b6a287f898c3c9785ca956" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" dependencies = [ "bytes", "futures-channel", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body", "hyper", "pin-project-lite", "socket2 0.5.5", "tokio", - "tower", "tower-service", "tracing", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -2608,12 +2716,23 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", ] [[package]] @@ -2624,7 +2743,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d" [[package]] name = "index-scheduler" -version = "1.12.0" +version = "1.12.2" dependencies = [ "anyhow", "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2635,7 +2754,7 @@ dependencies = [ "convert_case 0.6.0", "crossbeam-channel", "csv", - "derive_builder 0.20.0", + "derive_builder 0.20.2", "dump", "enum-iterator", "file-store", @@ -2653,7 +2772,7 @@ dependencies = [ "serde_json", "synchronoise", "tempfile", - "thiserror", + "thiserror 2.0.9", "time", "tracing", "ureq", @@ -2667,7 +2786,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" dependencies = [ "equivalent", - "hashbrown 0.15.1", + "hashbrown 0.15.2", "serde", ] @@ -2736,11 +2855,11 @@ dependencies = [ [[package]] name = "is-terminal" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" dependencies = [ - "hermit-abi", + "hermit-abi 0.4.0", "libc", "windows-sys 0.52.0", ] @@ -2781,6 +2900,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" @@ -2794,7 +2922,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1e2b0210dc78b49337af9e49d7ae41a39dceac6e5985613f1cf7763e2f76a25" dependencies = [ "cedarwood", - "derive_builder 0.20.0", + "derive_builder 0.20.2", "fxhash", "lazy_static", "phf", @@ -2822,7 +2950,7 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "1.12.0" +version = "1.12.2" dependencies = [ "criterion", "serde_json", @@ -2874,12 +3002,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "levenshtein_automata" version = "0.2.1" @@ -2891,9 +3013,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.164" +version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libgit2-sys" @@ -2909,12 +3031,12 @@ dependencies = [ [[package]] name = "libloading" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" +checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", - "windows-targets 0.52.4", + "windows-targets 0.52.6", ] [[package]] @@ -2935,9 +3057,9 @@ dependencies = [ [[package]] name = "libproc" -version = "0.14.8" +version = "0.14.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae9ea4b75e1a81675429dafe43441df1caea70081e82246a8cccf514884a88bb" +checksum = "e78a09b56be5adbcad5aa1197371688dc6bb249a26da3bca2011ee2fb987ebfb" dependencies = [ "bindgen", "errno", @@ -2992,7 +3114,7 @@ dependencies = [ "regex", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "unicode-blocks", "unicode-normalization", "unicode-segmentation", @@ -3062,7 +3184,7 @@ dependencies = [ "log", "once_cell", "serde", - "thiserror", + "thiserror 1.0.69", "yada", ] @@ -3112,7 +3234,7 @@ dependencies = [ "bincode", "byteorder", "csv", - "derive_builder 0.20.0", + "derive_builder 0.20.2", "encoding", "encoding_rs", "encoding_rs_io", @@ -3283,9 +3405,9 @@ checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "liquid" -version = "0.26.6" +version = "0.26.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10929f201279ba14da3297b957dcda1e0bf7a6f3bb5115688be684aa8864e9cc" +checksum = "7cdcc72b82748f47c2933c172313f5a9aea5b2c4eb3fa4c66b4ea55bb60bb4b1" dependencies = [ "doc-comment", "liquid-core", @@ -3296,12 +3418,12 @@ dependencies = [ [[package]] name = "liquid-core" -version = "0.26.6" +version = "0.26.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3aef4b2160791f456eb880c990a97746f693746f92302ef5f1d06111cf14b768" +checksum = "2752e978ffc53670f3f2e8b3ef09f348d6f7b5474a3be3f8a5befe5382e4effb" dependencies = [ "anymap2", - "itertools 0.12.1", + "itertools 0.13.0", "kstring", "liquid-derive", "num-traits", @@ -3314,9 +3436,9 @@ dependencies = [ [[package]] name = "liquid-derive" -version = "0.26.5" +version = "0.26.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "915f6d0a2963a27cd5205c1902f32ddfe3bc035816afd268cf88c0fc0f8d287e" +checksum = "3b51f1d220e3fa869e24cfd75915efe3164bd09bb11b3165db3f37f57bf673e3" dependencies = [ "proc-macro2", "quote", @@ -3325,11 +3447,11 @@ dependencies = [ [[package]] name = "liquid-lib" -version = "0.26.6" +version = "0.26.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f48fc446873f74d869582f5c4b8cbf3248c93395e410a67af5809b3731e44a" +checksum = "59b1a298d3d2287ee5b1e43840d885b8fdfc37d3f4e90d82aacfd04d021618da" dependencies = [ - "itertools 0.12.1", + "itertools 0.13.0", "liquid-core", "once_cell", "percent-encoding", @@ -3339,10 +3461,16 @@ dependencies = [ ] [[package]] -name = "lmdb-master-sys" -version = "0.2.2" +name = "litemap" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57640c190703d5ccf4a86aff4aeb749b2d287a8cb1723c76b51f39d77ab53b24" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + +[[package]] +name = "lmdb-master-sys" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "472c3760e2a8d0f61f322fb36788021bb36d573c502b50fa3e2bcaac3ec326c9" dependencies = [ "cc", "doxygen-rs", @@ -3441,7 +3569,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meili-snap" -version = "1.12.0" +version = "1.12.2" dependencies = [ "insta", "md5", @@ -3450,7 +3578,7 @@ dependencies = [ [[package]] name = "meilisearch" -version = "1.12.0" +version = "1.12.2" dependencies = [ "actix-cors", "actix-http", @@ -3480,7 +3608,7 @@ dependencies = [ "indexmap", "insta", "is-terminal", - "itertools 0.13.0", + "itertools 0.14.0", "jsonwebtoken", "lazy_static", "manifest-dir-macros", @@ -3522,7 +3650,7 @@ dependencies = [ "temp-env", "tempfile", "termcolor", - "thiserror", + "thiserror 2.0.9", "time", "tokio", "toml", @@ -3532,15 +3660,17 @@ dependencies = [ "tracing-trace", "url", "urlencoding", + "utoipa", + "utoipa-scalar", "uuid", "wiremock", "yaup", - "zip 2.1.3", + "zip 2.2.2", ] [[package]] name = "meilisearch-auth" -version = "1.12.0" +version = "1.12.2" dependencies = [ "base64 0.22.1", "enum-iterator", @@ -3552,14 +3682,14 @@ dependencies = [ "serde", "serde_json", "sha2", - "thiserror", + "thiserror 2.0.9", "time", "uuid", ] [[package]] name = "meilisearch-types" -version = "1.12.0" +version = "1.12.2" dependencies = [ "actix-web", "anyhow", @@ -3584,15 +3714,16 @@ dependencies = [ "serde_json", "tar", "tempfile", - "thiserror", + "thiserror 2.0.9", "time", "tokio", + "utoipa", "uuid", ] [[package]] name = "meilitool" -version = "1.12.0" +version = "1.12.2" dependencies = [ "anyhow", "arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05)", @@ -3627,7 +3758,7 @@ dependencies = [ [[package]] name = "milli" -version = "1.12.0" +version = "1.12.2" dependencies = [ "allocator-api2", "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -3657,12 +3788,12 @@ dependencies = [ "fxhash", "geoutils", "grenad", - "hashbrown 0.15.1", + "hashbrown 0.15.2", "heed", "hf-hub", "indexmap", "insta", - "itertools 0.13.0", + "itertools 0.14.0", "json-depth-checker", "levenshtein_automata", "liquid", @@ -3689,7 +3820,7 @@ dependencies = [ "smallvec", "smartstring", "tempfile", - "thiserror", + "thiserror 2.0.9", "thread_local", "tiktoken-rs", "time", @@ -3698,6 +3829,7 @@ dependencies = [ "uell", "ureq", "url", + "utoipa", "uuid", ] @@ -3741,6 +3873,15 @@ dependencies = [ "adler", ] +[[package]] +name = "miniz_oxide" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ffbe83022cedc1d264172192511ae958937694cd57ce297164951b8b3568394" +dependencies = [ + "adler2", +] + [[package]] name = "mio" version = "0.8.11" @@ -3753,6 +3894,17 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "mio" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.52.0", +] + [[package]] name = "monostate" version = "0.1.9" @@ -3782,9 +3934,9 @@ checksum = "79b7f3e22167862cc7c95b21a6f326c22e4bf40da59cbf000b368a310173ba11" [[package]] name = "mutually_exclusive_features" -version = "0.0.3" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e" +checksum = "e94e1e6445d314f972ff7395df2de295fe51b71821694f0b0e1e79c4f12c8577" [[package]] name = "nohash" @@ -3833,21 +3985,34 @@ dependencies = [ ] [[package]] -name = "num-bigint" +name = "num" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ - "autocfg", "num-integer", "num-traits", ] [[package]] name = "num-complex" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" dependencies = [ "bytemuck", "num-traits", @@ -3861,19 +4026,40 @@ checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" [[package]] name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" dependencies = [ "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", "num-traits", ] [[package]] name = "num-traits" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", "libm", @@ -3885,7 +4071,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.9", "libc", ] @@ -3942,9 +4128,9 @@ checksum = "ae4512a8f418ac322335255a72361b9ac927e106f4d7fe6ab4d8ac59cb01f7a9" [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "onig" @@ -3982,9 +4168,9 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "ordered-float" -version = "4.2.1" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ff2cf528c6c03d9ed653d6c4ce1dc0582dc4af309790ad92f07c1cd551b0be" +checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" dependencies = [ "num-traits", ] @@ -4083,7 +4269,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "permissive-json-pointer" -version = "1.12.0" +version = "1.12.2" dependencies = [ "big_s", "serde_json", @@ -4095,7 +4281,7 @@ version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1acb4a4365a13f749a93f1a094a7805e5cfa0955373a9de860d962eaa3a5fe5a" dependencies = [ - "thiserror", + "thiserror 1.0.69", "ucd-trie", ] @@ -4207,9 +4393,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" [[package]] name = "pin-utils" @@ -4302,7 +4488,6 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn 1.0.109", "version_check", ] @@ -4363,7 +4548,7 @@ dependencies = [ "parking_lot", "procfs", "protobuf", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -4416,7 +4601,7 @@ dependencies = [ "quinn-udp", "rustc-hash 1.1.0", "rustls", - "thiserror", + "thiserror 1.0.69", "tokio", "tracing", ] @@ -4433,7 +4618,7 @@ dependencies = [ "rustc-hash 2.1.0", "rustls", "slab", - "thiserror", + "thiserror 1.0.69", "tinyvec", "tracing", ] @@ -4587,14 +4772,14 @@ checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" dependencies = [ "getrandom", "redox_syscall 0.2.16", - "thiserror", + "thiserror 1.0.69", ] [[package]] name = "regex" -version = "1.10.5" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -4604,9 +4789,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -4621,9 +4806,9 @@ checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rend" @@ -4636,16 +4821,16 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.5" +version = "0.12.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37" +checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da" dependencies = [ "base64 0.22.1", "bytes", "futures-channel", "futures-core", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body", "http-body-util", "hyper", @@ -4669,6 +4854,7 @@ dependencies = [ "tokio", "tokio-rustls", "tokio-util", + "tower", "tower-service", "url", "wasm-bindgen", @@ -4676,7 +4862,7 @@ dependencies = [ "wasm-streams", "web-sys", "webpki-roots", - "winreg", + "windows-registry", ] [[package]] @@ -4752,9 +4938,9 @@ dependencies = [ [[package]] name = "roaring" -version = "0.10.7" +version = "0.10.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f81dc953b2244ddd5e7860cb0bb2a790494b898ef321d4aff8e260efab60cc88" +checksum = "a652edd001c53df0b3f96a36a8dc93fce6866988efc16808235653c6bcac8bf2" dependencies = [ "bytemuck", "byteorder", @@ -4763,9 +4949,9 @@ dependencies = [ [[package]] name = "rstar" -version = "0.12.0" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "133315eb94c7b1e8d0cb097e5a710d850263372fd028fff18969de708afc7008" +checksum = "421400d13ccfd26dfa5858199c30a5d76f9c54e0dba7575273025b43c5175dbb" dependencies = [ "heapless", "num-traits", @@ -4831,9 +5017,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.11" +version = "0.23.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4828ea528154ae444e5a642dbb7d5623354030dc9822b83fd9bb79683c7399d0" +checksum = "5065c3f250cbd332cd894be57c40fa52387247659b14a2d6041d121547903b1b" dependencies = [ "log", "once_cell", @@ -4846,25 +5032,24 @@ dependencies = [ [[package]] name = "rustls-pemfile" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" dependencies = [ - "base64 0.22.1", "rustls-pki-types", ] [[package]] name = "rustls-pki-types" -version = "1.7.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" +checksum = "d2bf47e6ff922db3825eb750c4e2ff784c6ff8fb9e13046ef6a1d1c5401b0b37" [[package]] name = "rustls-webpki" -version = "0.102.5" +version = "0.102.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a6fccd794a42c2c105b513a2f62bc3fd8f3ba57a4593677ceb0bd035164d78" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" dependencies = [ "ring", "rustls-pki-types", @@ -4873,9 +5058,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.17" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" [[package]] name = "ryu" @@ -4916,15 +5101,15 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "segment" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bdca318192c89bb31bffa2ef8e9e9898bc80f15a78db2fdd41cd051f1b41d01" +checksum = "1dd0f21b6eb87a45a7cce06075a29ccdb42658a6eb84bf40c8fc179479630609" dependencies = [ "async-trait", "reqwest", "serde", "serde_json", - "thiserror", + "thiserror 2.0.9", "time", ] @@ -4945,9 +5130,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.214" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" dependencies = [ "serde_derive", ] @@ -4963,9 +5148,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.214" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", @@ -4974,9 +5159,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.133" +version = "1.0.135" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" +checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" dependencies = [ "indexmap", "itoa", @@ -4996,9 +5181,9 @@ dependencies = [ [[package]] name = "serde_spanned" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0" +checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" dependencies = [ "serde", ] @@ -5098,7 +5283,7 @@ checksum = "adc4e5204eb1910f40f9cfa375f6f05b68c3abac4b6fd879c8ff5e7ae8a0a085" dependencies = [ "num-bigint", "num-traits", - "thiserror", + "thiserror 1.0.69", "time", ] @@ -5314,6 +5499,9 @@ name = "sync_wrapper" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +dependencies = [ + "futures-core", +] [[package]] name = "synchronoise" @@ -5345,21 +5533,20 @@ dependencies = [ "byteorder", "enum-as-inner", "libc", - "thiserror", + "thiserror 1.0.69", "walkdir", ] [[package]] name = "sysinfo" -version = "0.30.13" +version = "0.33.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3" +checksum = "4fc858248ea01b66f19d8e8a6d55f41deaf91e9d495246fd01368d99935c6c01" dependencies = [ - "cfg-if", "core-foundation-sys", "libc", + "memchr", "ntapi", - "once_cell", "rayon", "windows", ] @@ -5372,9 +5559,9 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "tar" -version = "0.4.41" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb797dad5fb5b76fcf519e702f4a589483b5ef06567f160c392832c1f5e44909" +checksum = "c65998313f8e17d0d553d28f91a0df93e4dbbbf770279c7bc21ca0f09ea1a1f6" dependencies = [ "filetime", "libc", @@ -5392,12 +5579,13 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.14.0" +version = "3.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" +checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704" dependencies = [ "cfg-if", "fastrand", + "getrandom", "once_cell", "rustix", "windows-sys 0.52.0", @@ -5423,18 +5611,38 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.61" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f072643fd0190df67a8bab670c20ef5d8737177d6ac6b2e9a236cb096206b2cc" +dependencies = [ + "thiserror-impl 2.0.9", ] [[package]] name = "thiserror-impl" -version = "1.0.61" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b50fa271071aae2e6ee85f842e2e28ba8cd2c5fb67f11fcb1fd70b276f9e7d4" dependencies = [ "proc-macro2", "quote", @@ -5453,24 +5661,25 @@ dependencies = [ [[package]] name = "tiktoken-rs" -version = "0.5.9" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c314e7ce51440f9e8f5a497394682a57b7c323d0f4d0a6b1b13c429056e0e234" +checksum = "44075987ee2486402f0808505dd65692163d243a337fc54363d49afac41087f6" dependencies = [ "anyhow", "base64 0.21.7", "bstr", - "fancy-regex 0.12.0", + "fancy-regex", "lazy_static", "parking_lot", + "regex", "rustc-hash 1.1.0", ] [[package]] name = "time" -version = "0.3.36" +version = "0.3.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" dependencies = [ "deranged", "itoa", @@ -5491,9 +5700,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" dependencies = [ "num-conv", "time-core", @@ -5508,6 +5717,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -5557,7 +5776,7 @@ dependencies = [ "serde", "serde_json", "spm_precompiled", - "thiserror", + "thiserror 1.0.69", "unicode-normalization-alignments", "unicode-segmentation", "unicode_categories", @@ -5565,28 +5784,27 @@ dependencies = [ [[package]] name = "tokio" -version = "1.38.0" +version = "1.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" +checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551" dependencies = [ "backtrace", "bytes", "libc", - "mio", - "num_cpus", + "mio 1.0.3", "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2 0.5.5", "tokio-macros", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "tokio-macros" -version = "2.3.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", @@ -5619,21 +5837,21 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.14" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f49eb2ab21d2f26bd6db7bf383edc527a7ebaee412d17af4d40fdccd442f335" +checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit 0.22.15", + "toml_edit 0.22.22", ] [[package]] name = "toml_datetime" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" dependencies = [ "serde", ] @@ -5651,27 +5869,27 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.22.15" +version = "0.22.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59a3a72298453f564e2b111fa896f8d07fabb36f51f06d7e875fc5e0b5a3ef1" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" dependencies = [ "indexmap", "serde", "serde_spanned", "toml_datetime", - "winnow 0.6.13", + "winnow 0.6.22", ] [[package]] name = "tower" -version = "0.4.13" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" dependencies = [ "futures-core", "futures-util", - "pin-project", "pin-project-lite", + "sync_wrapper", "tokio", "tower-layer", "tower-service", @@ -5679,21 +5897,21 @@ dependencies = [ [[package]] name = "tower-layer" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" [[package]] name = "tower-service" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "log", "pin-project-lite", @@ -5703,9 +5921,9 @@ dependencies = [ [[package]] name = "tracing-actix-web" -version = "0.7.11" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ee9e39a66d9b615644893ffc1704d2a89b5b315b7fd0228ad3182ca9a306b19" +checksum = "54a9f5c1aca50ebebf074ee665b9f99f2e84906dcf6b993a0d0090edb835166d" dependencies = [ "actix-web", "mutually_exclusive_features", @@ -5716,9 +5934,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", @@ -5727,9 +5945,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", "valuable", @@ -5737,9 +5955,9 @@ dependencies = [ [[package]] name = "tracing-error" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e" +checksum = "8b1581020d7a273442f5b45074a6a57d5757ad0a47dac0e9f0bd57b81936f3db" dependencies = [ "tracing", "tracing-subscriber", @@ -5758,9 +5976,9 @@ dependencies = [ [[package]] name = "tracing-serde" -version = "0.1.3" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" dependencies = [ "serde", "tracing-core", @@ -5768,9 +5986,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.18" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ "nu-ansi-term", "serde", @@ -5826,13 +6044,40 @@ dependencies = [ "bumpalo", ] +[[package]] +name = "ug" +version = "0.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4eef2ebfc18c67a6dbcacd9d8a4d85e0568cc58c82515552382312c2730ea13" +dependencies = [ + "half 2.4.1", + "num", + "serde", + "serde_json", + "thiserror 1.0.69", +] + +[[package]] +name = "ug-cuda" +version = "0.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c4dcab280ad0ef3957e153a82dcad608c954d02cf253b695322f502d1f8902e" +dependencies = [ + "cudarc", + "half 2.4.1", + "serde", + "serde_json", + "thiserror 1.0.69", + "ug", +] + [[package]] name = "unescaper" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c878a167baa8afd137494101a688ef8c67125089ff2249284bd2b5f9bfedb815" dependencies = [ - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -5844,12 +6089,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "unicode-bidi" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" - [[package]] name = "unicode-blocks" version = "0.1.9" @@ -5906,9 +6145,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "ureq" -version = "2.10.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72139d247e5f97a3eff96229a7ae85ead5328a39efe76f8bf5a06313d505b6ea" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" dependencies = [ "base64 0.22.1", "flate2", @@ -5925,9 +6164,9 @@ dependencies = [ [[package]] name = "url" -version = "2.5.2" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", "idna", @@ -5941,12 +6180,24 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + [[package]] name = "utf8-width" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.1" @@ -5954,10 +6205,47 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] -name = "uuid" -version = "1.10.0" +name = "utoipa" +version = "5.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" +checksum = "435c6f69ef38c9017b4b4eea965dfb91e71e53d869e896db40d1cf2441dd75c0" +dependencies = [ + "indexmap", + "serde", + "serde_json", + "utoipa-gen", +] + +[[package]] +name = "utoipa-gen" +version = "5.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a77d306bc75294fd52f3e99b13ece67c02c1a2789190a6f31d32f736624326f7" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "syn 2.0.87", + "uuid", +] + +[[package]] +name = "utoipa-scalar" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "088e93bf19f6bd06e0aacb02ca432b3c5a449c4aec2e4aa9fc333a667f2b2c55" +dependencies = [ + "actix-web", + "serde", + "serde_json", + "utoipa", +] + +[[package]] +name = "uuid" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" dependencies = [ "getrandom", "serde", @@ -5977,24 +6265,24 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "vergen" -version = "9.0.0" +version = "9.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c32e7318e93a9ac53693b6caccfb05ff22e04a44c7cf8a279051f24c09da286f" +checksum = "31f25fc8f8f05df455c7941e87f093ad22522a9ff33d7a027774815acf6f0639" dependencies = [ "anyhow", - "derive_builder 0.20.0", + "derive_builder 0.20.2", "rustversion", "vergen-lib", ] [[package]] name = "vergen-git2" -version = "1.0.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62c52cd2b2b8b7ec75fc20111b3022ac3ff83e4fc14b9497cfcfd39c54f9c67" +checksum = "5e63e069d8749fead1e3bab7a9d79e8fb90516b2ec66fc2243a798ecdc1a31d7" dependencies = [ "anyhow", - "derive_builder 0.20.0", + "derive_builder 0.20.2", "git2", "rustversion", "time", @@ -6004,13 +6292,12 @@ dependencies = [ [[package]] name = "vergen-lib" -version = "0.1.3" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e06bee42361e43b60f363bad49d63798d0f42fb1768091812270eca00c784720" +checksum = "c0c767e6751c09fc85cde58722cf2f1007e80e4c8d5a4321fc90d83dc54ca147" dependencies = [ "anyhow", - "derive_builder 0.20.0", - "getset", + "derive_builder 0.20.2", "rustversion", ] @@ -6197,21 +6484,85 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows" -version = "0.52.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" +checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143" dependencies = [ "windows-core", - "windows-targets 0.52.4", + "windows-targets 0.52.6", ] [[package]] name = "windows-core" -version = "0.52.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d" dependencies = [ - "windows-targets 0.52.4", + "windows-implement", + "windows-interface", + "windows-result 0.1.2", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-implement" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "windows-interface" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "windows-registry" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +dependencies = [ + "windows-result 0.2.0", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result 0.2.0", + "windows-targets 0.52.6", ] [[package]] @@ -6238,7 +6589,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.6", ] [[package]] @@ -6273,17 +6624,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.4", - "windows_aarch64_msvc 0.52.4", - "windows_i686_gnu 0.52.4", - "windows_i686_msvc 0.52.4", - "windows_x86_64_gnu 0.52.4", - "windows_x86_64_gnullvm 0.52.4", - "windows_x86_64_msvc 0.52.4", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] @@ -6300,9 +6652,9 @@ checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" @@ -6318,9 +6670,9 @@ checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" [[package]] name = "windows_aarch64_msvc" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" @@ -6336,9 +6688,15 @@ checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" [[package]] name = "windows_i686_gnu" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" @@ -6354,9 +6712,9 @@ checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" [[package]] name = "windows_i686_msvc" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" @@ -6372,9 +6730,9 @@ checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" [[package]] name = "windows_x86_64_gnu" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" @@ -6390,9 +6748,9 @@ checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" @@ -6408,9 +6766,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "windows_x86_64_msvc" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" @@ -6423,35 +6781,25 @@ dependencies = [ [[package]] name = "winnow" -version = "0.6.13" +version = "0.6.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59b5e5f6c299a3c7890b876a2a587f3115162487e704907d9b6cd29473052ba1" +checksum = "39281189af81c07ec09db316b302a3e67bf9bd7cbf6c820b50e35fee9c2fa980" dependencies = [ "memchr", ] -[[package]] -name = "winreg" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - [[package]] name = "wiremock" -version = "0.6.0" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec874e1eef0df2dcac546057fe5e29186f09c378181cd7b635b4b7bcc98e9d81" +checksum = "7fff469918e7ca034884c7fd8f93fe27bacb7fcb599fd879df6c7b429a29b646" dependencies = [ "assert-json-diff", "async-trait", - "base64 0.21.7", + "base64 0.22.1", "deadpool", "futures", - "http 1.1.0", + "http 1.2.0", "http-body-util", "hyper", "hyper-util", @@ -6464,6 +6812,18 @@ dependencies = [ "url", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "wyz" version = "0.5.1" @@ -6486,7 +6846,7 @@ dependencies = [ [[package]] name = "xtask" -version = "1.12.0" +version = "1.12.2" dependencies = [ "anyhow", "build-info", @@ -6521,14 +6881,14 @@ checksum = "b0144f1a16a199846cb21024da74edd930b43443463292f536b7110b4855b5c6" dependencies = [ "form_urlencoded", "serde", - "thiserror", + "thiserror 1.0.69", ] [[package]] name = "yoke" -version = "0.7.3" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e71b2e4f287f467794c671e2b8f8a5f3716b3c829079a1c44740148eff07e4" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" dependencies = [ "serde", "stable_deref_trait", @@ -6538,9 +6898,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.7.3" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e6936f0cce458098a201c245a11bef556c6a0181129c7034d10d76d1ec3a2b8" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", @@ -6609,6 +6969,28 @@ dependencies = [ "syn 2.0.87", ] +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "zip" version = "1.1.4" @@ -6621,14 +7003,14 @@ dependencies = [ "displaydoc", "indexmap", "num_enum", - "thiserror", + "thiserror 1.0.69", ] [[package]] name = "zip" -version = "2.1.3" +version = "2.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "775a2b471036342aa69bc5a602bc889cb0a06cda00477d0c69566757d5553d39" +checksum = "ae9c1ea7b3a5e1f4b922ff856a129881167511563dc219869afe3787fc0c1a45" dependencies = [ "aes", "arbitrary", @@ -6646,7 +7028,7 @@ dependencies = [ "pbkdf2", "rand", "sha1", - "thiserror", + "thiserror 2.0.9", "time", "zeroize", "zopfli", diff --git a/Cargo.toml b/Cargo.toml index 89a17d8fc..6a6610b15 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ members = [ ] [workspace.package] -version = "1.12.0" +version = "1.12.2" authors = [ "Quentin de Quelen ", "Clément Renault ", diff --git a/Dockerfile b/Dockerfile index 04557df59..ce4b3bfd8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Compile -FROM rust:1.79.0-alpine3.20 AS compiler +FROM rust:1.81.0-alpine3.20 AS compiler RUN apk add -q --no-cache build-base openssl-dev diff --git a/LICENSE b/LICENSE index 76a573977..686f24931 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2019-2024 Meili SAS +Copyright (c) 2019-2025 Meili SAS Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 4be92d439..42062781a 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ See the list of all our example apps in our [demos repository](https://github.co - **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants - **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets - **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs +- **AI-ready:** works out of the box with [langchain](https://www.meilisearch.com/with/langchain) and the [model context protocol](https://github.com/meilisearch/meilisearch-mcp) - **Easy to install, deploy, and maintain** ## 📖 Documentation diff --git a/crates/benchmarks/Cargo.toml b/crates/benchmarks/Cargo.toml index ccd256546..a2cddd554 100644 --- a/crates/benchmarks/Cargo.toml +++ b/crates/benchmarks/Cargo.toml @@ -11,27 +11,27 @@ edition.workspace = true license.workspace = true [dependencies] -anyhow = "1.0.86" +anyhow = "1.0.95" bumpalo = "3.16.0" -csv = "1.3.0" +csv = "1.3.1" memmap2 = "0.9.5" milli = { path = "../milli" } mimalloc = { version = "0.1.43", default-features = false } -serde_json = { version = "1.0.120", features = ["preserve_order"] } -tempfile = "3.14.0" +serde_json = { version = "1.0.135", features = ["preserve_order"] } +tempfile = "3.15.0" [dev-dependencies] criterion = { version = "0.5.1", features = ["html_reports"] } rand = "0.8.5" rand_chacha = "0.3.1" -roaring = "0.10.7" +roaring = "0.10.10" [build-dependencies] -anyhow = "1.0.86" -bytes = "1.6.0" +anyhow = "1.0.95" +bytes = "1.9.0" convert_case = "0.6.0" -flate2 = "1.0.30" -reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"], default-features = false } +flate2 = "1.0.35" +reqwest = { version = "0.12.12", features = ["blocking", "rustls-tls"], default-features = false } [features] default = ["milli/all-tokenizations"] diff --git a/crates/build-info/Cargo.toml b/crates/build-info/Cargo.toml index c24dffe5c..f8ede756e 100644 --- a/crates/build-info/Cargo.toml +++ b/crates/build-info/Cargo.toml @@ -11,8 +11,8 @@ license.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -time = { version = "0.3.36", features = ["parsing"] } +time = { version = "0.3.37", features = ["parsing"] } [build-dependencies] -anyhow = "1.0.86" -vergen-git2 = "1.0.0" +anyhow = "1.0.95" +vergen-git2 = "1.0.2" diff --git a/crates/dump/Cargo.toml b/crates/dump/Cargo.toml index 679a97b4e..5c427916c 100644 --- a/crates/dump/Cargo.toml +++ b/crates/dump/Cargo.toml @@ -11,21 +11,21 @@ readme.workspace = true license.workspace = true [dependencies] -anyhow = "1.0.86" -flate2 = "1.0.30" -http = "1.1.0" +anyhow = "1.0.95" +flate2 = "1.0.35" +http = "1.2.0" meilisearch-types = { path = "../meilisearch-types" } -once_cell = "1.19.0" -regex = "1.10.5" -roaring = { version = "0.10.7", features = ["serde"] } -serde = { version = "1.0.204", features = ["derive"] } -serde_json = { version = "1.0.120", features = ["preserve_order"] } -tar = "0.4.41" -tempfile = "3.10.1" -thiserror = "1.0.61" -time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] } -tracing = "0.1.40" -uuid = { version = "1.10.0", features = ["serde", "v4"] } +once_cell = "1.20.2" +regex = "1.11.1" +roaring = { version = "0.10.10", features = ["serde"] } +serde = { version = "1.0.217", features = ["derive"] } +serde_json = { version = "1.0.135", features = ["preserve_order"] } +tar = "0.4.43" +tempfile = "3.15.0" +thiserror = "2.0.9" +time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] } +tracing = "0.1.41" +uuid = { version = "1.11.0", features = ["serde", "v4"] } [dev-dependencies] big_s = "1.0.2" diff --git a/crates/file-store/Cargo.toml b/crates/file-store/Cargo.toml index 08b7bb717..66ea65336 100644 --- a/crates/file-store/Cargo.toml +++ b/crates/file-store/Cargo.toml @@ -11,7 +11,7 @@ edition.workspace = true license.workspace = true [dependencies] -tempfile = "3.10.1" -thiserror = "1.0.61" -tracing = "0.1.40" -uuid = { version = "1.10.0", features = ["serde", "v4"] } +tempfile = "3.15.0" +thiserror = "2.0.9" +tracing = "0.1.41" +uuid = { version = "1.11.0", features = ["serde", "v4"] } diff --git a/crates/filter-parser/Cargo.toml b/crates/filter-parser/Cargo.toml index 6725a35d1..2657315a4 100644 --- a/crates/filter-parser/Cargo.toml +++ b/crates/filter-parser/Cargo.toml @@ -17,4 +17,5 @@ nom_locate = "4.2.0" unescaper = "0.1.5" [dev-dependencies] -insta = "1.39.0" +# fixed version due to format breakages in v1.40 +insta = "=1.39.0" diff --git a/crates/fuzzers/Cargo.toml b/crates/fuzzers/Cargo.toml index 86a0f779d..a838350ba 100644 --- a/crates/fuzzers/Cargo.toml +++ b/crates/fuzzers/Cargo.toml @@ -11,12 +11,12 @@ edition.workspace = true license.workspace = true [dependencies] -arbitrary = { version = "1.3.2", features = ["derive"] } +arbitrary = { version = "1.4.1", features = ["derive"] } bumpalo = "3.16.0" -clap = { version = "4.5.9", features = ["derive"] } +clap = { version = "4.5.24", features = ["derive"] } either = "1.13.0" -fastrand = "2.1.0" +fastrand = "2.3.0" milli = { path = "../milli" } -serde = { version = "1.0.204", features = ["derive"] } -serde_json = { version = "1.0.120", features = ["preserve_order"] } -tempfile = "3.10.1" +serde = { version = "1.0.217", features = ["derive"] } +serde_json = { version = "1.0.135", features = ["preserve_order"] } +tempfile = "3.15.0" diff --git a/crates/index-scheduler/Cargo.toml b/crates/index-scheduler/Cargo.toml index ec2f17f84..69edace77 100644 --- a/crates/index-scheduler/Cargo.toml +++ b/crates/index-scheduler/Cargo.toml @@ -11,42 +11,43 @@ edition.workspace = true license.workspace = true [dependencies] -anyhow = "1.0.86" +anyhow = "1.0.95" bincode = "1.3.3" bumpalo = "3.16.0" -bumparaw-collections = "0.1.2" +bumparaw-collections = "0.1.4" convert_case = "0.6.0" -csv = "1.3.0" -derive_builder = "0.20.0" +csv = "1.3.1" +derive_builder = "0.20.2" dump = { path = "../dump" } enum-iterator = "2.1.0" file-store = { path = "../file-store" } -flate2 = "1.0.30" +flate2 = "1.0.35" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -memmap2 = "0.9.4" +memmap2 = "0.9.5" page_size = "0.6.0" rayon = "1.10.0" -roaring = { version = "0.10.7", features = ["serde"] } -serde = { version = "1.0.204", features = ["derive"] } -serde_json = { version = "1.0.120", features = ["preserve_order"] } +roaring = { version = "0.10.10", features = ["serde"] } +serde = { version = "1.0.217", features = ["derive"] } +serde_json = { version = "1.0.135", features = ["preserve_order"] } synchronoise = "1.0.1" -tempfile = "3.10.1" -thiserror = "1.0.61" -time = { version = "0.3.36", features = [ +tempfile = "3.15.0" +thiserror = "2.0.9" +time = { version = "0.3.37", features = [ "serde-well-known", "formatting", "parsing", "macros", ] } -tracing = "0.1.40" -ureq = "2.10.0" -uuid = { version = "1.10.0", features = ["serde", "v4"] } +tracing = "0.1.41" +ureq = "2.12.1" +uuid = { version = "1.11.0", features = ["serde", "v4"] } [dev-dependencies] arroy = "0.5.0" big_s = "1.0.2" -crossbeam-channel = "0.5.13" -insta = { version = "1.39.0", features = ["json", "redactions"] } +crossbeam-channel = "0.5.14" +# fixed version due to format breakages in v1.40 +insta = { version = "=1.39.0", features = ["json", "redactions"] } maplit = "1.0.2" meili-snap = { path = "../meili-snap" } diff --git a/crates/index-scheduler/src/autobatcher.rs b/crates/index-scheduler/src/autobatcher.rs deleted file mode 100644 index 5950e2b13..000000000 --- a/crates/index-scheduler/src/autobatcher.rs +++ /dev/null @@ -1,901 +0,0 @@ -/*! -The autobatcher is responsible for combining the next enqueued -tasks affecting a single index into a [batch](crate::batch::Batch). - -The main function of the autobatcher is [`next_autobatch`]. -*/ - -use std::ops::ControlFlow::{self, Break, Continue}; - -use meilisearch_types::milli::update::IndexDocumentsMethod::{ - self, ReplaceDocuments, UpdateDocuments, -}; -use meilisearch_types::tasks::TaskId; - -use crate::KindWithContent; - -/// Succinctly describes a task's [`Kind`](meilisearch_types::tasks::Kind) -/// for the purpose of simplifying the implementation of the autobatcher. -/// -/// Only the non-prioritised tasks that can be grouped in a batch have a corresponding [`AutobatchKind`] -enum AutobatchKind { - DocumentImport { - method: IndexDocumentsMethod, - allow_index_creation: bool, - primary_key: Option, - }, - DocumentEdition, - DocumentDeletion { - by_filter: bool, - }, - DocumentClear, - Settings { - allow_index_creation: bool, - }, - IndexCreation, - IndexDeletion, - IndexUpdate, - IndexSwap, -} - -impl AutobatchKind { - #[rustfmt::skip] - fn allow_index_creation(&self) -> Option { - match self { - AutobatchKind::DocumentImport { allow_index_creation, .. } - | AutobatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation), - _ => None, - } - } - - fn primary_key(&self) -> Option> { - match self { - AutobatchKind::DocumentImport { primary_key, .. } => Some(primary_key.as_deref()), - _ => None, - } - } -} - -impl From for AutobatchKind { - fn from(kind: KindWithContent) -> Self { - match kind { - KindWithContent::DocumentAdditionOrUpdate { - method, - allow_index_creation, - primary_key, - .. - } => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key }, - KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition, - KindWithContent::DocumentDeletion { .. } => { - AutobatchKind::DocumentDeletion { by_filter: false } - } - KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear, - KindWithContent::DocumentDeletionByFilter { .. } => { - AutobatchKind::DocumentDeletion { by_filter: true } - } - KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => { - AutobatchKind::Settings { - allow_index_creation: allow_index_creation && !is_deletion, - } - } - KindWithContent::IndexDeletion { .. } => AutobatchKind::IndexDeletion, - KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation, - KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate, - KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap, - KindWithContent::TaskCancelation { .. } - | KindWithContent::TaskDeletion { .. } - | KindWithContent::DumpCreation { .. } - | KindWithContent::SnapshotCreation => { - panic!("The autobatcher should never be called with tasks that don't apply to an index.") - } - } - } -} - -#[derive(Debug)] -pub enum BatchKind { - DocumentClear { - ids: Vec, - }, - DocumentOperation { - method: IndexDocumentsMethod, - allow_index_creation: bool, - primary_key: Option, - operation_ids: Vec, - }, - DocumentEdition { - id: TaskId, - }, - DocumentDeletion { - deletion_ids: Vec, - includes_by_filter: bool, - }, - ClearAndSettings { - other: Vec, - allow_index_creation: bool, - settings_ids: Vec, - }, - Settings { - allow_index_creation: bool, - settings_ids: Vec, - }, - IndexDeletion { - ids: Vec, - }, - IndexCreation { - id: TaskId, - }, - IndexUpdate { - id: TaskId, - }, - IndexSwap { - id: TaskId, - }, -} - -impl BatchKind { - #[rustfmt::skip] - fn allow_index_creation(&self) -> Option { - match self { - BatchKind::DocumentOperation { allow_index_creation, .. } - | BatchKind::ClearAndSettings { allow_index_creation, .. } - | BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation), - _ => None, - } - } - - fn primary_key(&self) -> Option> { - match self { - BatchKind::DocumentOperation { primary_key, .. } => Some(primary_key.as_deref()), - _ => None, - } - } -} - -impl BatchKind { - /// Returns a `ControlFlow::Break` if you must stop right now. - /// The boolean tell you if an index has been created by the batched task. - /// To ease the writing of the code. `true` can be returned when you don't need to create an index - /// but false can't be returned if you needs to create an index. - // TODO use an AutoBatchKind as input - pub fn new( - task_id: TaskId, - kind: KindWithContent, - primary_key: Option<&str>, - ) -> (ControlFlow, bool) { - use AutobatchKind as K; - - match AutobatchKind::from(kind) { - K::IndexCreation => (Break(BatchKind::IndexCreation { id: task_id }), true), - K::IndexDeletion => (Break(BatchKind::IndexDeletion { ids: vec![task_id] }), false), - K::IndexUpdate => (Break(BatchKind::IndexUpdate { id: task_id }), false), - K::IndexSwap => (Break(BatchKind::IndexSwap { id: task_id }), false), - K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false), - K::DocumentImport { method, allow_index_creation, primary_key: pk } - if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() => - { - ( - Continue(BatchKind::DocumentOperation { - method, - allow_index_creation, - primary_key: pk, - operation_ids: vec![task_id], - }), - allow_index_creation, - ) - } - // if the primary key set in the task was different than ours we should stop and make this batch fail asap. - K::DocumentImport { method, allow_index_creation, primary_key } => ( - Break(BatchKind::DocumentOperation { - method, - allow_index_creation, - primary_key, - operation_ids: vec![task_id], - }), - allow_index_creation, - ), - K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false), - K::DocumentDeletion { by_filter: includes_by_filter } => ( - Continue(BatchKind::DocumentDeletion { - deletion_ids: vec![task_id], - includes_by_filter, - }), - false, - ), - K::Settings { allow_index_creation } => ( - Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }), - allow_index_creation, - ), - } - } - - /// Returns a `ControlFlow::Break` if you must stop right now. - /// The boolean tell you if an index has been created by the batched task. - /// To ease the writing of the code. `true` can be returned when you don't need to create an index - /// but false can't be returned if you needs to create an index. - #[rustfmt::skip] - fn accumulate(self, id: TaskId, kind: AutobatchKind, index_already_exists: bool, primary_key: Option<&str>) -> ControlFlow { - use AutobatchKind as K; - - match (self, kind) { - // We don't batch any of these operations - (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break(this), - // We must not batch tasks that don't have the same index creation rights if the index doesn't already exists. - (this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => { - Break(this) - }, - // NOTE: We need to negate the whole condition since we're checking if we need to break instead of continue. - // I wrote it this way because it's easier to understand than the other way around. - (this, kind) if !( - // 1. If both task don't interact with primary key -> we can continue - (this.primary_key().is_none() && kind.primary_key().is_none()) || - // 2. Else -> - ( - // 2.1 If we already have a primary-key -> - ( - primary_key.is_some() && - // 2.1.1 If the task we're trying to accumulate have a pk it must be equal to our primary key - // 2.1.2 If the task don't have a primary-key -> we can continue - kind.primary_key().map_or(true, |pk| pk == primary_key) - ) || - // 2.2 If we don't have a primary-key -> - ( - // 2.2.1 If both the batch and the task have a primary key they should be equal - // 2.2.2 If the batch is set to Some(None), the task should be too - // 2.2.3 If the batch is set to None -> we can continue - this.primary_key().zip(kind.primary_key()).map_or(true, |(this, kind)| this == kind) - ) - ) - - ) // closing the negation - - => { - Break(this) - }, - // The index deletion can batch with everything but must stop after - ( - BatchKind::DocumentClear { mut ids } - | BatchKind::DocumentDeletion { deletion_ids: mut ids, includes_by_filter: _ } - | BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids } - | BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids }, - K::IndexDeletion, - ) => { - ids.push(id); - Break(BatchKind::IndexDeletion { ids }) - } - ( - BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other }, - K::IndexDeletion, - ) => { - ids.push(id); - ids.append(&mut other); - Break(BatchKind::IndexDeletion { ids }) - } - - ( - BatchKind::DocumentClear { mut ids }, - K::DocumentClear | K::DocumentDeletion { by_filter: _ }, - ) => { - ids.push(id); - Continue(BatchKind::DocumentClear { ids }) - } - ( - this @ BatchKind::DocumentClear { .. }, - K::DocumentImport { .. } | K::Settings { .. }, - ) => Break(this), - ( - BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, mut operation_ids }, - K::DocumentClear, - ) => { - operation_ids.push(id); - Continue(BatchKind::DocumentClear { ids: operation_ids }) - } - - // we can autobatch the same kind of document additions / updates - ( - BatchKind::DocumentOperation { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut operation_ids }, - K::DocumentImport { method: ReplaceDocuments, primary_key: pk, .. }, - ) => { - operation_ids.push(id); - Continue(BatchKind::DocumentOperation { - method: ReplaceDocuments, - allow_index_creation, - operation_ids, - primary_key: pk, - }) - } - ( - BatchKind::DocumentOperation { method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids }, - K::DocumentImport { method: UpdateDocuments, primary_key: pk, .. }, - ) => { - operation_ids.push(id); - Continue(BatchKind::DocumentOperation { - method: UpdateDocuments, - allow_index_creation, - primary_key: pk, - operation_ids, - }) - } - ( - BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids }, - K::DocumentDeletion { by_filter: false }, - ) => { - operation_ids.push(id); - - Continue(BatchKind::DocumentOperation { - method, - allow_index_creation, - primary_key, - operation_ids, - }) - } - // We can't batch a document operation with a delete by filter - ( - this @ BatchKind::DocumentOperation { .. }, - K::DocumentDeletion { by_filter: true }, - ) => { - Break(this) - } - // but we can't autobatch documents if it's not the same kind - // this match branch MUST be AFTER the previous one - ( - this @ BatchKind::DocumentOperation { .. }, - K::DocumentImport { .. }, - ) => Break(this), - - ( - this @ BatchKind::DocumentOperation { .. }, - K::Settings { .. }, - ) => Break(this), - - (BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => { - deletion_ids.push(id); - Continue(BatchKind::DocumentClear { ids: deletion_ids }) - } - // we can't autobatch the deletion and import if the document deletion contained a filter - ( - this @ BatchKind::DocumentDeletion { deletion_ids: _, includes_by_filter: true }, - K::DocumentImport { .. } - ) => Break(this), - // we can autobatch the deletion and import if the index already exists - ( - BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false }, - K::DocumentImport { method, allow_index_creation, primary_key } - ) if index_already_exists => { - deletion_ids.push(id); - - Continue(BatchKind::DocumentOperation { - method, - allow_index_creation, - primary_key, - operation_ids: deletion_ids, - }) - } - // we can autobatch the deletion and import if both can't create an index - ( - BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false }, - K::DocumentImport { method, allow_index_creation, primary_key } - ) if !allow_index_creation => { - deletion_ids.push(id); - - Continue(BatchKind::DocumentOperation { - method, - allow_index_creation, - primary_key, - operation_ids: deletion_ids, - }) - } - // we can't autobatch a deletion and an import if the index does not exists but would be created by an addition - ( - this @ BatchKind::DocumentDeletion { .. }, - K::DocumentImport { .. } - ) => { - Break(this) - } - (BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter }, K::DocumentDeletion { by_filter }) => { - deletion_ids.push(id); - Continue(BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: includes_by_filter | by_filter }) - } - (this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this), - - ( - BatchKind::Settings { settings_ids, allow_index_creation }, - K::DocumentClear, - ) => Continue(BatchKind::ClearAndSettings { - settings_ids, - allow_index_creation, - other: vec![id], - }), - ( - this @ BatchKind::Settings { .. }, - K::DocumentImport { .. } | K::DocumentDeletion { .. }, - ) => Break(this), - ( - BatchKind::Settings { mut settings_ids, allow_index_creation }, - K::Settings { .. }, - ) => { - settings_ids.push(id); - Continue(BatchKind::Settings { - allow_index_creation, - settings_ids, - }) - } - - ( - BatchKind::ClearAndSettings { mut other, settings_ids, allow_index_creation }, - K::DocumentClear, - ) => { - other.push(id); - Continue(BatchKind::ClearAndSettings { - other, - settings_ids, - allow_index_creation, - }) - } - (this @ BatchKind::ClearAndSettings { .. }, K::DocumentImport { .. }) => Break(this), - ( - BatchKind::ClearAndSettings { - mut other, - settings_ids, - allow_index_creation, - }, - K::DocumentDeletion { .. }, - ) => { - other.push(id); - Continue(BatchKind::ClearAndSettings { - other, - settings_ids, - allow_index_creation, - }) - } - ( - BatchKind::ClearAndSettings { mut settings_ids, other, allow_index_creation }, - K::Settings { .. }, - ) => { - settings_ids.push(id); - Continue(BatchKind::ClearAndSettings { - other, - settings_ids, - allow_index_creation, - }) - } - - ( - BatchKind::IndexCreation { .. } - | BatchKind::IndexDeletion { .. } - | BatchKind::IndexUpdate { .. } - | BatchKind::IndexSwap { .. } - | BatchKind::DocumentEdition { .. }, - _, - ) => { - unreachable!() - } - } - } -} - -/// Create a batch from an ordered list of tasks. -/// -/// ## Preconditions -/// 1. The tasks must be enqueued and given in the order in which they were enqueued -/// 2. The tasks must not be prioritised tasks (e.g. task cancellation, dump, snapshot, task deletion) -/// 3. The tasks must all be related to the same index -/// -/// ## Return -/// `None` if the list of tasks is empty. Otherwise, an [`AutoBatch`] that represents -/// a subset of the given tasks. -pub fn autobatch( - enqueued: Vec<(TaskId, KindWithContent)>, - index_already_exists: bool, - primary_key: Option<&str>, -) -> Option<(BatchKind, bool)> { - let mut enqueued = enqueued.into_iter(); - let (id, kind) = enqueued.next()?; - - // index_exist will keep track of if the index should exist at this point after the tasks we batched. - let mut index_exist = index_already_exists; - - let (mut acc, must_create_index) = match BatchKind::new(id, kind, primary_key) { - (Continue(acc), create) => (acc, create), - (Break(acc), create) => return Some((acc, create)), - }; - - // if an index has been created in the previous step we can consider it as existing. - index_exist |= must_create_index; - - for (id, kind) in enqueued { - acc = match acc.accumulate(id, kind.into(), index_exist, primary_key) { - Continue(acc) => acc, - Break(acc) => return Some((acc, must_create_index)), - }; - } - - Some((acc, must_create_index)) -} - -#[cfg(test)] -mod tests { - use meilisearch_types::tasks::IndexSwap; - use uuid::Uuid; - - use super::*; - use crate::debug_snapshot; - - fn autobatch_from( - index_already_exists: bool, - primary_key: Option<&str>, - input: impl IntoIterator, - ) -> Option<(BatchKind, bool)> { - autobatch( - input.into_iter().enumerate().map(|(id, kind)| (id as TaskId, kind)).collect(), - index_already_exists, - primary_key, - ) - } - - fn doc_imp( - method: IndexDocumentsMethod, - allow_index_creation: bool, - primary_key: Option<&str>, - ) -> KindWithContent { - KindWithContent::DocumentAdditionOrUpdate { - index_uid: String::from("doggo"), - primary_key: primary_key.map(|pk| pk.to_string()), - method, - content_file: Uuid::new_v4(), - documents_count: 0, - allow_index_creation, - } - } - - fn doc_del() -> KindWithContent { - KindWithContent::DocumentDeletion { - index_uid: String::from("doggo"), - documents_ids: Vec::new(), - } - } - - fn doc_del_fil() -> KindWithContent { - KindWithContent::DocumentDeletionByFilter { - index_uid: String::from("doggo"), - filter_expr: serde_json::json!("cuteness > 100"), - } - } - - fn doc_clr() -> KindWithContent { - KindWithContent::DocumentClear { index_uid: String::from("doggo") } - } - - fn settings(allow_index_creation: bool) -> KindWithContent { - KindWithContent::SettingsUpdate { - index_uid: String::from("doggo"), - new_settings: Default::default(), - is_deletion: false, - allow_index_creation, - } - } - - fn idx_create() -> KindWithContent { - KindWithContent::IndexCreation { index_uid: String::from("doggo"), primary_key: None } - } - - fn idx_update() -> KindWithContent { - KindWithContent::IndexUpdate { index_uid: String::from("doggo"), primary_key: None } - } - - fn idx_del() -> KindWithContent { - KindWithContent::IndexDeletion { index_uid: String::from("doggo") } - } - - fn idx_swap() -> KindWithContent { - KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: (String::from("doggo"), String::from("catto")) }], - } - } - - #[test] - fn autobatch_simple_operation_together() { - // we can autobatch one or multiple `ReplaceDocuments` together. - // if the index exists. - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))"); - - // if it doesn't exists. - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - - // we can autobatch one or multiple `UpdateDocuments` together. - // if the index exists. - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))"); - - // if it doesn't exists. - debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))"); - - // we can autobatch one or multiple DocumentDeletion together - debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))"); - - // we can autobatch one or multiple DocumentDeletionByFilter together - debug_snapshot!(autobatch_from(true, None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))"); - - // we can autobatch one or multiple Settings together - debug_snapshot!(autobatch_from(true, None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))"); - debug_snapshot!(autobatch_from(true, None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))"); - - debug_snapshot!(autobatch_from(false,None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(false,None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))"); - debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))"); - - // We can autobatch document addition with document deletion - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - // And the other way around - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - - // But we can't autobatch document addition with document deletion by filter - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); - // And the other way around - debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - } - - #[test] - fn simple_document_operation_dont_autobatch_with_other() { - // addition, updates and deletion by filter can't batch together - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - } - - #[test] - fn document_addition_doesnt_batch_with_settings() { - // simple case - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - - // multiple settings and doc addition - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - - // addition and setting unordered - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - - // Doesn't batch with other forbidden operations - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - } - - #[test] - fn clear_and_additions() { - // these two doesn't need to batch - debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))"); - - // Basic use case - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); - - // This batch kind doesn't mix with other document addition - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); - - // But you can batch multiple clear together - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))"); - } - - #[test] - fn clear_and_additions_and_settings() { - // A clear don't need to autobatch the settings that happens AFTER there is no documents - debug_snapshot!(autobatch_from(true, None, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))"); - - debug_snapshot!(autobatch_from(true, None, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - } - - #[test] - fn anything_and_index_deletion() { - // The `IndexDeletion` doesn't batch with anything that happens AFTER. - debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))"); - - debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))"); - - // The index deletion can accept almost any type of `BatchKind` and transform it to an `IndexDeletion`. - // First, the basic cases - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(true, None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); - - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); - } - - #[test] - fn allowed_and_disallowed_index_creation() { - // `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists. - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - - // batch deletion and addition - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); - } - - #[test] - fn autobatch_primary_key() { - // ==> If I have a pk - // With a single update - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); - - // With a multiple updates - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); - - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); - - // ==> If I don't have a pk - // With a single update - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); - - // With a multiple updates - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); - } -} diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs deleted file mode 100644 index a8b67436e..000000000 --- a/crates/index-scheduler/src/batch.rs +++ /dev/null @@ -1,1950 +0,0 @@ -/*! -This module handles the creation and processing of batch operations. - -A batch is a combination of multiple tasks that can be processed at once. -Executing a batch operation should always be functionally equivalent to -executing each of its tasks' operations individually and in order. - -For example, if the user sends two tasks: -1. import documents X -2. import documents Y - -We can combine the two tasks in a single batch: -1. import documents X and Y - -Processing this batch is functionally equivalent to processing the two -tasks individually, but should be much faster since we are only performing -one indexing operation. -*/ - -use std::collections::{BTreeSet, HashMap, HashSet}; -use std::ffi::OsStr; -use std::fmt; -use std::fs::{self, File}; -use std::io::BufWriter; -use std::sync::atomic::Ordering; - -use bumpalo::collections::CollectIn; -use bumpalo::Bump; -use dump::IndexMetadata; -use meilisearch_types::batches::BatchId; -use meilisearch_types::heed::{RoTxn, RwTxn}; -use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; -use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey}; -use meilisearch_types::milli::heed::CompactionOption; -use meilisearch_types::milli::progress::Progress; -use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction}; -use meilisearch_types::milli::update::{ - DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings, -}; -use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; -use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder}; -use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; -use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; -use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; -use roaring::RoaringBitmap; -use time::macros::format_description; -use time::OffsetDateTime; -use uuid::Uuid; - -use crate::autobatcher::{self, BatchKind}; -use crate::processing::{ - AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, AtomicUpdateFileStep, CreateIndexProgress, - DeleteIndexProgress, DocumentDeletionProgress, DocumentEditionProgress, - DocumentOperationProgress, DumpCreationProgress, InnerSwappingTwoIndexes, SettingsProgress, - SnapshotCreationProgress, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress, - UpdateIndexProgress, VariableNameStep, -}; -use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch}; -use crate::{Error, IndexScheduler, Result, TaskId}; - -/// Represents a combination of tasks that can all be processed at the same time. -/// -/// A batch contains the set of tasks that it represents (accessible through -/// [`self.ids()`](Batch::ids)), as well as additional information on how to -/// be processed. -#[derive(Debug)] -pub(crate) enum Batch { - TaskCancelation { - /// The task cancelation itself. - task: Task, - }, - TaskDeletions(Vec), - SnapshotCreation(Vec), - Dump(Task), - IndexOperation { - op: IndexOperation, - must_create_index: bool, - }, - IndexCreation { - index_uid: String, - primary_key: Option, - task: Task, - }, - IndexUpdate { - index_uid: String, - primary_key: Option, - task: Task, - }, - IndexDeletion { - index_uid: String, - tasks: Vec, - index_has_been_created: bool, - }, - IndexSwap { - task: Task, - }, -} - -#[derive(Debug)] -pub(crate) enum DocumentOperation { - Add(Uuid), - Delete(Vec), -} - -/// A [batch](Batch) that combines multiple tasks operating on an index. -#[derive(Debug)] -pub(crate) enum IndexOperation { - DocumentOperation { - index_uid: String, - primary_key: Option, - method: IndexDocumentsMethod, - operations: Vec, - tasks: Vec, - }, - DocumentEdition { - index_uid: String, - task: Task, - }, - DocumentDeletion { - index_uid: String, - tasks: Vec, - }, - DocumentClear { - index_uid: String, - tasks: Vec, - }, - Settings { - index_uid: String, - // The boolean indicates if it's a settings deletion or creation. - settings: Vec<(bool, Settings)>, - tasks: Vec, - }, - DocumentClearAndSetting { - index_uid: String, - cleared_tasks: Vec, - - // The boolean indicates if it's a settings deletion or creation. - settings: Vec<(bool, Settings)>, - settings_tasks: Vec, - }, -} - -impl Batch { - /// Return the task ids associated with this batch. - pub fn ids(&self) -> RoaringBitmap { - match self { - Batch::TaskCancelation { task, .. } - | Batch::Dump(task) - | Batch::IndexCreation { task, .. } - | Batch::IndexUpdate { task, .. } => { - RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() - } - Batch::SnapshotCreation(tasks) - | Batch::TaskDeletions(tasks) - | Batch::IndexDeletion { tasks, .. } => { - RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid)) - } - Batch::IndexOperation { op, .. } => match op { - IndexOperation::DocumentOperation { tasks, .. } - | IndexOperation::Settings { tasks, .. } - | IndexOperation::DocumentDeletion { tasks, .. } - | IndexOperation::DocumentClear { tasks, .. } => { - RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid)) - } - IndexOperation::DocumentEdition { task, .. } => { - RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() - } - IndexOperation::DocumentClearAndSetting { - cleared_tasks: tasks, - settings_tasks: other, - .. - } => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)), - }, - Batch::IndexSwap { task } => { - RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() - } - } - } - - /// Return the index UID associated with this batch - pub fn index_uid(&self) -> Option<&str> { - use Batch::*; - match self { - TaskCancelation { .. } - | TaskDeletions(_) - | SnapshotCreation(_) - | Dump(_) - | IndexSwap { .. } => None, - IndexOperation { op, .. } => Some(op.index_uid()), - IndexCreation { index_uid, .. } - | IndexUpdate { index_uid, .. } - | IndexDeletion { index_uid, .. } => Some(index_uid), - } - } -} - -impl fmt::Display for Batch { - /// A text used when we debug the profiling reports. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let index_uid = self.index_uid(); - let tasks = self.ids(); - match self { - Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?, - Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?, - Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?, - Batch::Dump(_) => f.write_str("Dump")?, - Batch::IndexOperation { op, .. } => write!(f, "{op}")?, - Batch::IndexCreation { .. } => f.write_str("IndexCreation")?, - Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?, - Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?, - Batch::IndexSwap { .. } => f.write_str("IndexSwap")?, - }; - match index_uid { - Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")), - None => f.write_fmt(format_args!(" from tasks: {tasks:?}")), - } - } -} - -impl IndexOperation { - pub fn index_uid(&self) -> &str { - match self { - IndexOperation::DocumentOperation { index_uid, .. } - | IndexOperation::DocumentEdition { index_uid, .. } - | IndexOperation::DocumentDeletion { index_uid, .. } - | IndexOperation::DocumentClear { index_uid, .. } - | IndexOperation::Settings { index_uid, .. } - | IndexOperation::DocumentClearAndSetting { index_uid, .. } => index_uid, - } - } -} - -impl fmt::Display for IndexOperation { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - IndexOperation::DocumentOperation { .. } => { - f.write_str("IndexOperation::DocumentOperation") - } - IndexOperation::DocumentEdition { .. } => { - f.write_str("IndexOperation::DocumentEdition") - } - IndexOperation::DocumentDeletion { .. } => { - f.write_str("IndexOperation::DocumentDeletion") - } - IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"), - IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"), - IndexOperation::DocumentClearAndSetting { .. } => { - f.write_str("IndexOperation::DocumentClearAndSetting") - } - } - } -} - -impl IndexScheduler { - /// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`]. - /// - /// ## Arguments - /// - `rtxn`: read transaction - /// - `index_uid`: name of the index affected by the operations of the autobatch - /// - `batch`: the result of the autobatcher - pub(crate) fn create_next_batch_index( - &self, - rtxn: &RoTxn, - index_uid: String, - batch: BatchKind, - current_batch: &mut ProcessingBatch, - must_create_index: bool, - ) -> Result> { - match batch { - BatchKind::DocumentClear { ids } => Ok(Some(Batch::IndexOperation { - op: IndexOperation::DocumentClear { - tasks: self.get_existing_tasks_for_processing_batch( - rtxn, - current_batch, - ids, - )?, - index_uid, - }, - must_create_index, - })), - BatchKind::DocumentEdition { id } => { - let mut task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; - current_batch.processing(Some(&mut task)); - match &task.kind { - KindWithContent::DocumentEdition { index_uid, .. } => { - Ok(Some(Batch::IndexOperation { - op: IndexOperation::DocumentEdition { - index_uid: index_uid.clone(), - task, - }, - must_create_index: false, - })) - } - _ => unreachable!(), - } - } - BatchKind::DocumentOperation { method, operation_ids, .. } => { - let tasks = self.get_existing_tasks_for_processing_batch( - rtxn, - current_batch, - operation_ids, - )?; - let primary_key = tasks - .iter() - .find_map(|task| match task.kind { - KindWithContent::DocumentAdditionOrUpdate { ref primary_key, .. } => { - // we want to stop on the first document addition - Some(primary_key.clone()) - } - KindWithContent::DocumentDeletion { .. } => None, - _ => unreachable!(), - }) - .flatten(); - - let mut operations = Vec::new(); - - for task in tasks.iter() { - match task.kind { - KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => { - operations.push(DocumentOperation::Add(content_file)); - } - KindWithContent::DocumentDeletion { ref documents_ids, .. } => { - operations.push(DocumentOperation::Delete(documents_ids.clone())); - } - _ => unreachable!(), - } - } - - Ok(Some(Batch::IndexOperation { - op: IndexOperation::DocumentOperation { - index_uid, - primary_key, - method, - operations, - tasks, - }, - must_create_index, - })) - } - BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: _ } => { - let tasks = self.get_existing_tasks_for_processing_batch( - rtxn, - current_batch, - deletion_ids, - )?; - - Ok(Some(Batch::IndexOperation { - op: IndexOperation::DocumentDeletion { index_uid, tasks }, - must_create_index, - })) - } - BatchKind::Settings { settings_ids, .. } => { - let tasks = self.get_existing_tasks_for_processing_batch( - rtxn, - current_batch, - settings_ids, - )?; - - let mut settings = Vec::new(); - for task in &tasks { - match task.kind { - KindWithContent::SettingsUpdate { - ref new_settings, is_deletion, .. - } => settings.push((is_deletion, *new_settings.clone())), - _ => unreachable!(), - } - } - - Ok(Some(Batch::IndexOperation { - op: IndexOperation::Settings { index_uid, settings, tasks }, - must_create_index, - })) - } - BatchKind::ClearAndSettings { other, settings_ids, allow_index_creation } => { - let (index_uid, settings, settings_tasks) = match self - .create_next_batch_index( - rtxn, - index_uid, - BatchKind::Settings { settings_ids, allow_index_creation }, - current_batch, - must_create_index, - )? - .unwrap() - { - Batch::IndexOperation { - op: IndexOperation::Settings { index_uid, settings, tasks, .. }, - .. - } => (index_uid, settings, tasks), - _ => unreachable!(), - }; - let (index_uid, cleared_tasks) = match self - .create_next_batch_index( - rtxn, - index_uid, - BatchKind::DocumentClear { ids: other }, - current_batch, - must_create_index, - )? - .unwrap() - { - Batch::IndexOperation { - op: IndexOperation::DocumentClear { index_uid, tasks }, - .. - } => (index_uid, tasks), - _ => unreachable!(), - }; - - Ok(Some(Batch::IndexOperation { - op: IndexOperation::DocumentClearAndSetting { - index_uid, - cleared_tasks, - settings, - settings_tasks, - }, - must_create_index, - })) - } - BatchKind::IndexCreation { id } => { - let mut task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; - current_batch.processing(Some(&mut task)); - let (index_uid, primary_key) = match &task.kind { - KindWithContent::IndexCreation { index_uid, primary_key } => { - (index_uid.clone(), primary_key.clone()) - } - _ => unreachable!(), - }; - Ok(Some(Batch::IndexCreation { index_uid, primary_key, task })) - } - BatchKind::IndexUpdate { id } => { - let mut task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; - current_batch.processing(Some(&mut task)); - let primary_key = match &task.kind { - KindWithContent::IndexUpdate { primary_key, .. } => primary_key.clone(), - _ => unreachable!(), - }; - Ok(Some(Batch::IndexUpdate { index_uid, primary_key, task })) - } - BatchKind::IndexDeletion { ids } => Ok(Some(Batch::IndexDeletion { - index_uid, - index_has_been_created: must_create_index, - tasks: self.get_existing_tasks_for_processing_batch(rtxn, current_batch, ids)?, - })), - BatchKind::IndexSwap { id } => { - let mut task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; - current_batch.processing(Some(&mut task)); - Ok(Some(Batch::IndexSwap { task })) - } - } - } - - /// Create the next batch to be processed; - /// 1. We get the *last* task to cancel. - /// 2. We get the *next* task to delete. - /// 3. We get the *next* snapshot to process. - /// 4. We get the *next* dump to process. - /// 5. We get the *next* tasks to process for a specific index. - #[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")] - pub(crate) fn create_next_batch( - &self, - rtxn: &RoTxn, - ) -> Result> { - #[cfg(test)] - self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?; - - let batch_id = self.next_batch_id(rtxn)?; - let mut current_batch = ProcessingBatch::new(batch_id); - - let enqueued = &self.get_status(rtxn, Status::Enqueued)?; - let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued; - - // 1. we get the last task to cancel. - if let Some(task_id) = to_cancel.max() { - let mut task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; - current_batch.processing(Some(&mut task)); - return Ok(Some((Batch::TaskCancelation { task }, current_batch))); - } - - // 2. we get the next task to delete - let to_delete = self.get_kind(rtxn, Kind::TaskDeletion)? & enqueued; - if !to_delete.is_empty() { - let mut tasks = self.get_existing_tasks(rtxn, to_delete)?; - current_batch.processing(&mut tasks); - return Ok(Some((Batch::TaskDeletions(tasks), current_batch))); - } - - // 3. we batch the snapshot. - let to_snapshot = self.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued; - if !to_snapshot.is_empty() { - let mut tasks = self.get_existing_tasks(rtxn, to_snapshot)?; - current_batch.processing(&mut tasks); - return Ok(Some((Batch::SnapshotCreation(tasks), current_batch))); - } - - // 4. we batch the dumps. - let to_dump = self.get_kind(rtxn, Kind::DumpCreation)? & enqueued; - if let Some(to_dump) = to_dump.min() { - let mut task = self.get_task(rtxn, to_dump)?.ok_or(Error::CorruptedTaskQueue)?; - current_batch.processing(Some(&mut task)); - return Ok(Some((Batch::Dump(task), current_batch))); - } - - // 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task. - let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) }; - let mut task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; - - // If the task is not associated with any index, verify that it is an index swap and - // create the batch directly. Otherwise, get the index name associated with the task - // and use the autobatcher to batch the enqueued tasks associated with it - - let index_name = if let Some(&index_name) = task.indexes().first() { - index_name - } else { - assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty())); - current_batch.processing(Some(&mut task)); - return Ok(Some((Batch::IndexSwap { task }, current_batch))); - }; - - let index_already_exists = self.index_mapper.exists(rtxn, index_name)?; - let mut primary_key = None; - if index_already_exists { - let index = self.index_mapper.index(rtxn, index_name)?; - let rtxn = index.read_txn()?; - primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string()); - } - - let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued; - - // If autobatching is disabled we only take one task at a time. - // Otherwise, we take only a maximum of tasks to create batches. - let tasks_limit = - if self.autobatching_enabled { self.max_number_of_batched_tasks } else { 1 }; - - let enqueued = index_tasks - .into_iter() - .take(tasks_limit) - .map(|task_id| { - self.get_task(rtxn, task_id) - .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) - .map(|task| (task.uid, task.kind)) - }) - .collect::>>()?; - - if let Some((batchkind, create_index)) = - autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref()) - { - return Ok(self - .create_next_batch_index( - rtxn, - index_name.to_string(), - batchkind, - &mut current_batch, - create_index, - )? - .map(|batch| (batch, current_batch))); - } - - // If we found no tasks then we were notified for something that got autobatched - // somehow and there is nothing to do. - Ok(None) - } - - /// Apply the operation associated with the given batch. - /// - /// ## Return - /// The list of tasks that were processed. The metadata of each task in the returned - /// list is updated accordingly, with the exception of the its date fields - /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). - #[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))] - pub(crate) fn process_batch( - &self, - batch: Batch, - current_batch: &mut ProcessingBatch, - progress: Progress, - ) -> Result> { - #[cfg(test)] - { - self.maybe_fail(crate::tests::FailureLocation::InsideProcessBatch)?; - self.maybe_fail(crate::tests::FailureLocation::PanicInsideProcessBatch)?; - self.breakpoint(crate::Breakpoint::InsideProcessBatch); - } - - match batch { - Batch::TaskCancelation { mut task } => { - // 1. Retrieve the tasks that matched the query at enqueue-time. - let matched_tasks = - if let KindWithContent::TaskCancelation { tasks, query: _ } = &task.kind { - tasks - } else { - unreachable!() - }; - - let rtxn = self.env.read_txn()?; - let mut canceled_tasks = self.cancel_matched_tasks( - &rtxn, - task.uid, - current_batch, - matched_tasks, - &progress, - )?; - - task.status = Status::Succeeded; - match &mut task.details { - Some(Details::TaskCancelation { - matched_tasks: _, - canceled_tasks: canceled_tasks_details, - original_filter: _, - }) => { - *canceled_tasks_details = Some(canceled_tasks.len() as u64); - } - _ => unreachable!(), - } - - canceled_tasks.push(task); - - Ok(canceled_tasks) - } - Batch::TaskDeletions(mut tasks) => { - // 1. Retrieve the tasks that matched the query at enqueue-time. - let mut matched_tasks = RoaringBitmap::new(); - - for task in tasks.iter() { - if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind { - matched_tasks |= tasks; - } else { - unreachable!() - } - } - - let mut wtxn = self.env.write_txn()?; - let mut deleted_tasks = - self.delete_matched_tasks(&mut wtxn, &matched_tasks, &progress)?; - wtxn.commit()?; - - for task in tasks.iter_mut() { - task.status = Status::Succeeded; - let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else { - unreachable!() - }; - - let deleted_tasks_count = deleted_tasks.intersection_len(tasks); - deleted_tasks -= tasks; - - match &mut task.details { - Some(Details::TaskDeletion { - matched_tasks: _, - deleted_tasks, - original_filter: _, - }) => { - *deleted_tasks = Some(deleted_tasks_count); - } - _ => unreachable!(), - } - } - Ok(tasks) - } - Batch::SnapshotCreation(mut tasks) => { - progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation); - - fs::create_dir_all(&self.snapshots_path)?; - let temp_snapshot_dir = tempfile::tempdir()?; - - // 1. Snapshot the version file. - let dst = temp_snapshot_dir.path().join(VERSION_FILE_NAME); - fs::copy(&self.version_file_path, dst)?; - - // 2. Snapshot the index-scheduler LMDB env - // - // When we call copy_to_file, LMDB opens a read transaction by itself, - // we can't provide our own. It is an issue as we would like to know - // the update files to copy but new ones can be enqueued between the copy - // of the env and the new transaction we open to retrieve the enqueued tasks. - // So we prefer opening a new transaction after copying the env and copy more - // update files than not enough. - // - // Note that there cannot be any update files deleted between those - // two read operations as the task processing is synchronous. - - // 2.1 First copy the LMDB env of the index-scheduler - progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler); - let dst = temp_snapshot_dir.path().join("tasks"); - fs::create_dir_all(&dst)?; - self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; - - // 2.2 Create a read transaction on the index-scheduler - let rtxn = self.env.read_txn()?; - - // 2.3 Create the update files directory - let update_files_dir = temp_snapshot_dir.path().join("update_files"); - fs::create_dir_all(&update_files_dir)?; - - // 2.4 Only copy the update files of the enqueued tasks - progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles); - let enqueued = self.get_status(&rtxn, Status::Enqueued)?; - let (atomic, update_file_progress) = - AtomicUpdateFileStep::new(enqueued.len() as u32); - progress.update_progress(update_file_progress); - for task_id in enqueued { - let task = self.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; - if let Some(content_uuid) = task.content_uuid() { - let src = self.file_store.get_update_path(content_uuid); - let dst = update_files_dir.join(content_uuid.to_string()); - fs::copy(src, dst)?; - } - atomic.fetch_add(1, Ordering::Relaxed); - } - - // 3. Snapshot every indexes - progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes); - let index_mapping = self.index_mapper.index_mapping; - let nb_indexes = index_mapping.len(&rtxn)? as u32; - - for (i, result) in index_mapping.iter(&rtxn)?.enumerate() { - let (name, uuid) = result?; - progress.update_progress(VariableNameStep::new(name, i as u32, nb_indexes)); - let index = self.index_mapper.index(&rtxn, name)?; - let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); - fs::create_dir_all(&dst)?; - index - .copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled) - .map_err(|e| Error::from_milli(e, Some(name.to_string())))?; - } - - drop(rtxn); - - // 4. Snapshot the auth LMDB env - progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys); - let dst = temp_snapshot_dir.path().join("auth"); - fs::create_dir_all(&dst)?; - // TODO We can't use the open_auth_store_env function here but we should - let auth = unsafe { - milli::heed::EnvOpenOptions::new() - .map_size(1024 * 1024 * 1024) // 1 GiB - .max_dbs(2) - .open(&self.auth_path) - }?; - auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; - - // 5. Copy and tarball the flat snapshot - progress.update_progress(SnapshotCreationProgress::CreateTheTarball); - // 5.1 Find the original name of the database - // TODO find a better way to get this path - let mut base_path = self.env.path().to_owned(); - base_path.pop(); - let db_name = base_path.file_name().and_then(OsStr::to_str).unwrap_or("data.ms"); - - // 5.2 Tarball the content of the snapshot in a tempfile with a .snapshot extension - let snapshot_path = self.snapshots_path.join(format!("{}.snapshot", db_name)); - let temp_snapshot_file = tempfile::NamedTempFile::new_in(&self.snapshots_path)?; - compression::to_tar_gz(temp_snapshot_dir.path(), temp_snapshot_file.path())?; - let file = temp_snapshot_file.persist(snapshot_path)?; - - // 5.3 Change the permission to make the snapshot readonly - let mut permissions = file.metadata()?.permissions(); - permissions.set_readonly(true); - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - #[allow(clippy::non_octal_unix_permissions)] - // rwxrwxrwx - permissions.set_mode(0b100100100); - } - - file.set_permissions(permissions)?; - - for task in &mut tasks { - task.status = Status::Succeeded; - } - - Ok(tasks) - } - Batch::Dump(mut task) => { - progress.update_progress(DumpCreationProgress::StartTheDumpCreation); - let started_at = OffsetDateTime::now_utc(); - let (keys, instance_uid) = - if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind { - (keys, instance_uid) - } else { - unreachable!(); - }; - let dump = dump::DumpWriter::new(*instance_uid)?; - - // 1. dump the keys - progress.update_progress(DumpCreationProgress::DumpTheApiKeys); - let mut dump_keys = dump.create_keys()?; - for key in keys { - dump_keys.push_key(key)?; - } - dump_keys.flush()?; - - let rtxn = self.env.read_txn()?; - - // 2. dump the tasks - progress.update_progress(DumpCreationProgress::DumpTheTasks); - let mut dump_tasks = dump.create_tasks_queue()?; - - let (atomic, update_task_progress) = - AtomicTaskStep::new(self.all_tasks.len(&rtxn)? as u32); - progress.update_progress(update_task_progress); - - for ret in self.all_tasks.iter(&rtxn)? { - if self.must_stop_processing.get() { - return Err(Error::AbortedTask); - } - - let (_, mut t) = ret?; - let status = t.status; - let content_file = t.content_uuid(); - - // In the case we're dumping ourselves we want to be marked as finished - // to not loop over ourselves indefinitely. - if t.uid == task.uid { - let finished_at = OffsetDateTime::now_utc(); - - // We're going to fake the date because we don't know if everything is going to go well. - // But we need to dump the task as finished and successful. - // If something fail everything will be set appropriately in the end. - t.status = Status::Succeeded; - t.started_at = Some(started_at); - t.finished_at = Some(finished_at); - } - let mut dump_content_file = dump_tasks.push_task(&t.into())?; - - // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. - if let Some(content_file) = content_file { - if self.must_stop_processing.get() { - return Err(Error::AbortedTask); - } - if status == Status::Enqueued { - let content_file = self.file_store.get_update(content_file)?; - - let reader = DocumentsBatchReader::from_reader(content_file) - .map_err(|e| Error::from_milli(e.into(), None))?; - - let (mut cursor, documents_batch_index) = - reader.into_cursor_and_fields_index(); - - while let Some(doc) = cursor - .next_document() - .map_err(|e| Error::from_milli(e.into(), None))? - { - dump_content_file.push_document( - &obkv_to_object(doc, &documents_batch_index) - .map_err(|e| Error::from_milli(e, None))?, - )?; - } - dump_content_file.flush()?; - } - } - atomic.fetch_add(1, Ordering::Relaxed); - } - dump_tasks.flush()?; - - // 3. Dump the indexes - progress.update_progress(DumpCreationProgress::DumpTheIndexes); - let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32; - let mut count = 0; - self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> { - progress.update_progress(VariableNameStep::new( - uid.to_string(), - count, - nb_indexes, - )); - count += 1; - - let rtxn = index.read_txn()?; - let metadata = IndexMetadata { - uid: uid.to_owned(), - primary_key: index.primary_key(&rtxn)?.map(String::from), - created_at: index - .created_at(&rtxn) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?, - updated_at: index - .updated_at(&rtxn) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?, - }; - let mut index_dumper = dump.create_index(uid, &metadata)?; - - let fields_ids_map = index.fields_ids_map(&rtxn)?; - let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); - let embedding_configs = index - .embedding_configs(&rtxn) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - - let nb_documents = index - .number_of_documents(&rtxn) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))? - as u32; - let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents); - progress.update_progress(update_document_progress); - let documents = index - .all_documents(&rtxn) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - // 3.1. Dump the documents - for ret in documents { - if self.must_stop_processing.get() { - return Err(Error::AbortedTask); - } - - let (id, doc) = - ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - - let mut document = - milli::obkv_to_json(&all_fields, &fields_ids_map, doc) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - - 'inject_vectors: { - let embeddings = index - .embeddings(&rtxn, id) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - - if embeddings.is_empty() { - break 'inject_vectors; - } - - let vectors = document - .entry(RESERVED_VECTORS_FIELD_NAME.to_owned()) - .or_insert(serde_json::Value::Object(Default::default())); - - let serde_json::Value::Object(vectors) = vectors else { - let user_err = milli::Error::UserError( - milli::UserError::InvalidVectorsMapType { - document_id: { - if let Ok(Some(Ok(index))) = index - .external_id_of(&rtxn, std::iter::once(id)) - .map(|it| it.into_iter().next()) - { - index - } else { - format!("internal docid={id}") - } - }, - value: vectors.clone(), - }, - ); - - return Err(Error::from_milli(user_err, Some(uid.to_string()))); - }; - - for (embedder_name, embeddings) in embeddings { - let user_provided = embedding_configs - .iter() - .find(|conf| conf.name == embedder_name) - .is_some_and(|conf| conf.user_provided.contains(id)); - - let embeddings = ExplicitVectors { - embeddings: Some( - VectorOrArrayOfVectors::from_array_of_vectors(embeddings), - ), - regenerate: !user_provided, - }; - vectors.insert( - embedder_name, - serde_json::to_value(embeddings).unwrap(), - ); - } - } - - index_dumper.push_document(&document)?; - atomic.fetch_add(1, Ordering::Relaxed); - } - - // 3.2. Dump the settings - let settings = meilisearch_types::settings::settings( - index, - &rtxn, - meilisearch_types::settings::SecretPolicy::RevealSecrets, - ) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - index_dumper.settings(&settings)?; - Ok(()) - })?; - - // 4. Dump experimental feature settings - progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures); - let features = self.features().runtime_features(); - dump.create_experimental_features(features)?; - - let dump_uid = started_at.format(format_description!( - "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" - )).unwrap(); - - if self.must_stop_processing.get() { - return Err(Error::AbortedTask); - } - progress.update_progress(DumpCreationProgress::CompressTheDump); - let path = self.dumps_path.join(format!("{}.dump", dump_uid)); - let file = File::create(path)?; - dump.persist_to(BufWriter::new(file))?; - - // if we reached this step we can tell the scheduler we succeeded to dump ourselves. - task.status = Status::Succeeded; - task.details = Some(Details::Dump { dump_uid: Some(dump_uid) }); - Ok(vec![task]) - } - Batch::IndexOperation { op, must_create_index } => { - let index_uid = op.index_uid().to_string(); - let index = if must_create_index { - // create the index if it doesn't already exist - let wtxn = self.env.write_txn()?; - self.index_mapper.create_index(wtxn, &index_uid, None)? - } else { - let rtxn = self.env.read_txn()?; - self.index_mapper.index(&rtxn, &index_uid)? - }; - - // the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick - self.index_mapper - .set_currently_updating_index(Some((index_uid.clone(), index.clone()))); - - let mut index_wtxn = index.write_txn()?; - let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?; - - { - let span = tracing::trace_span!(target: "indexing::scheduler", "commit"); - let _entered = span.enter(); - - index_wtxn.commit()?; - } - - // if the update processed successfully, we're going to store the new - // stats of the index. Since the tasks have already been processed and - // this is a non-critical operation. If it fails, we should not fail - // the entire batch. - let res = || -> Result<()> { - let index_rtxn = index.read_txn()?; - let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn) - .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; - let mut wtxn = self.env.write_txn()?; - self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?; - wtxn.commit()?; - Ok(()) - }(); - - match res { - Ok(_) => (), - Err(e) => tracing::error!( - error = &e as &dyn std::error::Error, - "Could not write the stats of the index" - ), - } - - Ok(tasks) - } - Batch::IndexCreation { index_uid, primary_key, task } => { - progress.update_progress(CreateIndexProgress::CreatingTheIndex); - - let wtxn = self.env.write_txn()?; - if self.index_mapper.exists(&wtxn, &index_uid)? { - return Err(Error::IndexAlreadyExists(index_uid)); - } - self.index_mapper.create_index(wtxn, &index_uid, None)?; - - self.process_batch( - Batch::IndexUpdate { index_uid, primary_key, task }, - current_batch, - progress, - ) - } - Batch::IndexUpdate { index_uid, primary_key, mut task } => { - progress.update_progress(UpdateIndexProgress::UpdatingTheIndex); - let rtxn = self.env.read_txn()?; - let index = self.index_mapper.index(&rtxn, &index_uid)?; - - if let Some(primary_key) = primary_key.clone() { - let mut index_wtxn = index.write_txn()?; - let mut builder = MilliSettings::new( - &mut index_wtxn, - &index, - self.index_mapper.indexer_config(), - ); - builder.set_primary_key(primary_key); - let must_stop_processing = self.must_stop_processing.clone(); - builder - .execute( - |indexing_step| tracing::debug!(update = ?indexing_step), - || must_stop_processing.get(), - ) - .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; - index_wtxn.commit()?; - } - - // drop rtxn before starting a new wtxn on the same db - rtxn.commit()?; - - task.status = Status::Succeeded; - task.details = Some(Details::IndexInfo { primary_key }); - - // if the update processed successfully, we're going to store the new - // stats of the index. Since the tasks have already been processed and - // this is a non-critical operation. If it fails, we should not fail - // the entire batch. - let res = || -> Result<()> { - let mut wtxn = self.env.write_txn()?; - let index_rtxn = index.read_txn()?; - let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn) - .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; - self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?; - wtxn.commit()?; - Ok(()) - }(); - - match res { - Ok(_) => (), - Err(e) => tracing::error!( - error = &e as &dyn std::error::Error, - "Could not write the stats of the index" - ), - } - - Ok(vec![task]) - } - Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => { - progress.update_progress(DeleteIndexProgress::DeletingTheIndex); - let wtxn = self.env.write_txn()?; - - // it's possible that the index doesn't exist - let number_of_documents = || -> Result { - let index = self.index_mapper.index(&wtxn, &index_uid)?; - let index_rtxn = index.read_txn()?; - index - .number_of_documents(&index_rtxn) - .map_err(|e| Error::from_milli(e, Some(index_uid.to_string()))) - }() - .unwrap_or_default(); - - // The write transaction is directly owned and committed inside. - match self.index_mapper.delete_index(wtxn, &index_uid) { - Ok(()) => (), - Err(Error::IndexNotFound(_)) if index_has_been_created => (), - Err(e) => return Err(e), - } - - // We set all the tasks details to the default value. - for task in &mut tasks { - task.status = Status::Succeeded; - task.details = match &task.kind { - KindWithContent::IndexDeletion { .. } => { - Some(Details::ClearAll { deleted_documents: Some(number_of_documents) }) - } - otherwise => otherwise.default_finished_details(), - }; - } - - Ok(tasks) - } - Batch::IndexSwap { mut task } => { - progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap); - - let mut wtxn = self.env.write_txn()?; - let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind { - swaps - } else { - unreachable!() - }; - let mut not_found_indexes = BTreeSet::new(); - for IndexSwap { indexes: (lhs, rhs) } in swaps { - for index in [lhs, rhs] { - let index_exists = self.index_mapper.index_exists(&wtxn, index)?; - if !index_exists { - not_found_indexes.insert(index); - } - } - } - if !not_found_indexes.is_empty() { - if not_found_indexes.len() == 1 { - return Err(Error::SwapIndexNotFound( - not_found_indexes.into_iter().next().unwrap().clone(), - )); - } else { - return Err(Error::SwapIndexesNotFound( - not_found_indexes.into_iter().cloned().collect(), - )); - } - } - progress.update_progress(SwappingTheIndexes::SwappingTheIndexes); - for (step, swap) in swaps.iter().enumerate() { - progress.update_progress(VariableNameStep::new( - format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1), - step as u32, - swaps.len() as u32, - )); - self.apply_index_swap( - &mut wtxn, - &progress, - task.uid, - &swap.indexes.0, - &swap.indexes.1, - )?; - } - wtxn.commit()?; - task.status = Status::Succeeded; - Ok(vec![task]) - } - } - } - - /// Swap the index `lhs` with the index `rhs`. - fn apply_index_swap( - &self, - wtxn: &mut RwTxn, - progress: &Progress, - task_id: u32, - lhs: &str, - rhs: &str, - ) -> Result<()> { - progress.update_progress(InnerSwappingTwoIndexes::RetrieveTheTasks); - // 1. Verify that both lhs and rhs are existing indexes - let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?; - if !index_lhs_exists { - return Err(Error::IndexNotFound(lhs.to_owned())); - } - let index_rhs_exists = self.index_mapper.index_exists(wtxn, rhs)?; - if !index_rhs_exists { - return Err(Error::IndexNotFound(rhs.to_owned())); - } - - // 2. Get the task set for index = name that appeared before the index swap task - let mut index_lhs_task_ids = self.index_tasks(wtxn, lhs)?; - index_lhs_task_ids.remove_range(task_id..); - let mut index_rhs_task_ids = self.index_tasks(wtxn, rhs)?; - index_rhs_task_ids.remove_range(task_id..); - - // 3. before_name -> new_name in the task's KindWithContent - progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks); - let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids; - let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32); - progress.update_progress(task_progress); - - for task_id in tasks_to_update { - let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; - swap_index_uid_in_task(&mut task, (lhs, rhs)); - self.all_tasks.put(wtxn, &task_id, &task)?; - atomic.fetch_add(1, Ordering::Relaxed); - } - - // 4. remove the task from indexuid = before_name - // 5. add the task to indexuid = after_name - progress.update_progress(InnerSwappingTwoIndexes::UpdateTheIndexesMetadata); - self.update_index(wtxn, lhs, |lhs_tasks| { - *lhs_tasks -= &index_lhs_task_ids; - *lhs_tasks |= &index_rhs_task_ids; - })?; - self.update_index(wtxn, rhs, |rhs_tasks| { - *rhs_tasks -= &index_rhs_task_ids; - *rhs_tasks |= &index_lhs_task_ids; - })?; - - // 6. Swap in the index mapper - self.index_mapper.swap(wtxn, lhs, rhs)?; - - Ok(()) - } - - /// Process the index operation on the given index. - /// - /// ## Return - /// The list of processed tasks. - #[tracing::instrument( - level = "trace", - skip(self, index_wtxn, index, progress), - target = "indexing::scheduler" - )] - fn apply_index_operation<'i>( - &self, - index_wtxn: &mut RwTxn<'i>, - index: &'i Index, - operation: IndexOperation, - progress: Progress, - ) -> Result> { - let indexer_alloc = Bump::new(); - - let started_processing_at = std::time::Instant::now(); - let must_stop_processing = self.must_stop_processing.clone(); - - match operation { - IndexOperation::DocumentClear { index_uid, mut tasks } => { - let count = milli::update::ClearDocuments::new(index_wtxn, index) - .execute() - .map_err(|e| Error::from_milli(e, Some(index_uid)))?; - - let mut first_clear_found = false; - for task in &mut tasks { - task.status = Status::Succeeded; - // The first document clear will effectively delete every documents - // in the database but the next ones will clear 0 documents. - task.details = match &task.kind { - KindWithContent::DocumentClear { .. } => { - let count = if first_clear_found { 0 } else { count }; - first_clear_found = true; - Some(Details::ClearAll { deleted_documents: Some(count) }) - } - otherwise => otherwise.default_details(), - }; - } - - Ok(tasks) - } - IndexOperation::DocumentOperation { - index_uid, - primary_key, - method, - operations, - mut tasks, - } => { - progress.update_progress(DocumentOperationProgress::RetrievingConfig); - // TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches. - // this is made difficult by the fact we're doing private clones of the index scheduler and sending it - // to a fresh thread. - let mut content_files = Vec::new(); - for operation in &operations { - if let DocumentOperation::Add(content_uuid) = operation { - let content_file = self.file_store.get_update(*content_uuid)?; - let mmap = unsafe { memmap2::Mmap::map(&content_file)? }; - if !mmap.is_empty() { - content_files.push(mmap); - } - } - } - - let rtxn = index.read_txn()?; - let db_fields_ids_map = index.fields_ids_map(&rtxn)?; - let mut new_fields_ids_map = db_fields_ids_map.clone(); - - let mut content_files_iter = content_files.iter(); - let mut indexer = indexer::DocumentOperation::new(method); - let embedders = index - .embedding_configs(index_wtxn) - .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; - let embedders = self.embedders(index_uid.clone(), embedders)?; - for operation in operations { - match operation { - DocumentOperation::Add(_content_uuid) => { - let mmap = content_files_iter.next().unwrap(); - indexer - .add_documents(mmap) - .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; - } - DocumentOperation::Delete(document_ids) => { - let document_ids: bumpalo::collections::vec::Vec<_> = document_ids - .iter() - .map(|s| &*indexer_alloc.alloc_str(s)) - .collect_in(&indexer_alloc); - indexer.delete_documents(document_ids.into_bump_slice()); - } - } - } - - let local_pool; - let indexer_config = self.index_mapper.indexer_config(); - let pool = match &indexer_config.thread_pool { - Some(pool) => pool, - None => { - local_pool = ThreadPoolNoAbortBuilder::new() - .thread_name(|i| format!("indexing-thread-{i}")) - .build() - .unwrap(); - &local_pool - } - }; - - progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges); - let (document_changes, operation_stats, primary_key) = indexer - .into_changes( - &indexer_alloc, - index, - &rtxn, - primary_key.as_deref(), - &mut new_fields_ids_map, - &|| must_stop_processing.get(), - progress.clone(), - ) - .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; - - let mut candidates_count = 0; - for (stats, task) in operation_stats.into_iter().zip(&mut tasks) { - candidates_count += stats.document_count; - match stats.error { - Some(error) => { - task.status = Status::Failed; - task.error = Some(milli::Error::UserError(error).into()); - } - None => task.status = Status::Succeeded, - } - - task.details = match task.details { - Some(Details::DocumentAdditionOrUpdate { received_documents, .. }) => { - Some(Details::DocumentAdditionOrUpdate { - received_documents, - indexed_documents: Some(stats.document_count), - }) - } - Some(Details::DocumentDeletion { provided_ids, .. }) => { - Some(Details::DocumentDeletion { - provided_ids, - deleted_documents: Some(stats.document_count), - }) - } - _ => { - // In the case of a `documentAdditionOrUpdate` or `DocumentDeletion` - // the details MUST be set to either addition or deletion - unreachable!(); - } - } - } - - progress.update_progress(DocumentOperationProgress::Indexing); - if tasks.iter().any(|res| res.error.is_none()) { - indexer::index( - index_wtxn, - index, - pool, - indexer_config.grenad_parameters(), - &db_fields_ids_map, - new_fields_ids_map, - primary_key, - &document_changes, - embedders, - &|| must_stop_processing.get(), - &progress, - ) - .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; - - let addition = DocumentAdditionResult { - indexed_documents: candidates_count, - number_of_documents: index - .number_of_documents(index_wtxn) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, - }; - - tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); - } - - Ok(tasks) - } - IndexOperation::DocumentEdition { index_uid, mut task } => { - progress.update_progress(DocumentEditionProgress::RetrievingConfig); - - let (filter, code) = if let KindWithContent::DocumentEdition { - filter_expr, - context: _, - function, - .. - } = &task.kind - { - (filter_expr, function) - } else { - unreachable!() - }; - - let candidates = match filter.as_ref().map(Filter::from_json) { - Some(Ok(Some(filter))) => filter - .evaluate(index_wtxn, index) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, - None | Some(Ok(None)) => index.documents_ids(index_wtxn)?, - Some(Err(e)) => return Err(Error::from_milli(e, Some(index_uid.clone()))), - }; - - let (original_filter, context, function) = if let Some(Details::DocumentEdition { - original_filter, - context, - function, - .. - }) = task.details - { - (original_filter, context, function) - } else { - // In the case of a `documentEdition` the details MUST be set - unreachable!(); - }; - - if candidates.is_empty() { - task.status = Status::Succeeded; - task.details = Some(Details::DocumentEdition { - original_filter, - context, - function, - deleted_documents: Some(0), - edited_documents: Some(0), - }); - - return Ok(vec![task]); - } - - let rtxn = index.read_txn()?; - let db_fields_ids_map = index.fields_ids_map(&rtxn)?; - let mut new_fields_ids_map = db_fields_ids_map.clone(); - // candidates not empty => index not empty => a primary key is set - let primary_key = index.primary_key(&rtxn)?.unwrap(); - - let primary_key = - PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map) - .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?; - - let result_count = Ok((candidates.len(), candidates.len())) as Result<_>; - - if task.error.is_none() { - let local_pool; - let indexer_config = self.index_mapper.indexer_config(); - let pool = match &indexer_config.thread_pool { - Some(pool) => pool, - None => { - local_pool = ThreadPoolNoAbortBuilder::new() - .thread_name(|i| format!("indexing-thread-{i}")) - .build() - .unwrap(); - &local_pool - } - }; - - let candidates_count = candidates.len(); - progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges); - let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone()); - let document_changes = pool - .install(|| { - indexer - .into_changes(&primary_key) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone()))) - }) - .unwrap()?; - let embedders = index - .embedding_configs(index_wtxn) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; - let embedders = self.embedders(index_uid.clone(), embedders)?; - - progress.update_progress(DocumentEditionProgress::Indexing); - indexer::index( - index_wtxn, - index, - pool, - indexer_config.grenad_parameters(), - &db_fields_ids_map, - new_fields_ids_map, - None, // cannot change primary key in DocumentEdition - &document_changes, - embedders, - &|| must_stop_processing.get(), - &progress, - ) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; - - let addition = DocumentAdditionResult { - indexed_documents: candidates_count, - number_of_documents: index - .number_of_documents(index_wtxn) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, - }; - - tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); - } - - match result_count { - Ok((deleted_documents, edited_documents)) => { - task.status = Status::Succeeded; - task.details = Some(Details::DocumentEdition { - original_filter, - context, - function, - deleted_documents: Some(deleted_documents), - edited_documents: Some(edited_documents), - }); - } - Err(e) => { - task.status = Status::Failed; - task.details = Some(Details::DocumentEdition { - original_filter, - context, - function, - deleted_documents: Some(0), - edited_documents: Some(0), - }); - task.error = Some(e.into()); - } - } - - Ok(vec![task]) - } - IndexOperation::DocumentDeletion { mut tasks, index_uid } => { - progress.update_progress(DocumentDeletionProgress::RetrievingConfig); - - let mut to_delete = RoaringBitmap::new(); - let external_documents_ids = index.external_documents_ids(); - - for task in tasks.iter_mut() { - let before = to_delete.len(); - task.status = Status::Succeeded; - - match &task.kind { - KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => { - for id in documents_ids { - if let Some(id) = external_documents_ids.get(index_wtxn, id)? { - to_delete.insert(id); - } - } - let will_be_removed = to_delete.len() - before; - task.details = Some(Details::DocumentDeletion { - provided_ids: documents_ids.len(), - deleted_documents: Some(will_be_removed), - }); - } - KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } => { - let before = to_delete.len(); - let filter = match Filter::from_json(filter_expr) { - Ok(filter) => filter, - Err(err) => { - // theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens - task.status = Status::Failed; - task.error = Some( - Error::from_milli(err, Some(index_uid.clone())).into(), - ); - None - } - }; - if let Some(filter) = filter { - let candidates = filter - .evaluate(index_wtxn, index) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone()))); - match candidates { - Ok(candidates) => to_delete |= candidates, - Err(err) => { - task.status = Status::Failed; - task.error = Some(err.into()); - } - }; - } - let will_be_removed = to_delete.len() - before; - if let Some(Details::DocumentDeletionByFilter { - original_filter: _, - deleted_documents, - }) = &mut task.details - { - *deleted_documents = Some(will_be_removed); - } else { - // In the case of a `documentDeleteByFilter` the details MUST be set - unreachable!() - } - } - _ => unreachable!(), - } - } - - if to_delete.is_empty() { - return Ok(tasks); - } - - let rtxn = index.read_txn()?; - let db_fields_ids_map = index.fields_ids_map(&rtxn)?; - let mut new_fields_ids_map = db_fields_ids_map.clone(); - - // to_delete not empty => index not empty => primary key set - let primary_key = index.primary_key(&rtxn)?.unwrap(); - - let primary_key = - PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map) - .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?; - - if !tasks.iter().all(|res| res.error.is_some()) { - let local_pool; - let indexer_config = self.index_mapper.indexer_config(); - let pool = match &indexer_config.thread_pool { - Some(pool) => pool, - None => { - local_pool = ThreadPoolNoAbortBuilder::new() - .thread_name(|i| format!("indexing-thread-{i}")) - .build() - .unwrap(); - &local_pool - } - }; - - progress.update_progress(DocumentDeletionProgress::DeleteDocuments); - let mut indexer = indexer::DocumentDeletion::new(); - let candidates_count = to_delete.len(); - indexer.delete_documents_by_docids(to_delete); - let document_changes = indexer.into_changes(&indexer_alloc, primary_key); - let embedders = index - .embedding_configs(index_wtxn) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; - let embedders = self.embedders(index_uid.clone(), embedders)?; - - progress.update_progress(DocumentDeletionProgress::Indexing); - indexer::index( - index_wtxn, - index, - pool, - indexer_config.grenad_parameters(), - &db_fields_ids_map, - new_fields_ids_map, - None, // document deletion never changes primary key - &document_changes, - embedders, - &|| must_stop_processing.get(), - &progress, - ) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; - - let addition = DocumentAdditionResult { - indexed_documents: candidates_count, - number_of_documents: index - .number_of_documents(index_wtxn) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, - }; - - tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); - } - - Ok(tasks) - } - IndexOperation::Settings { index_uid, settings, mut tasks } => { - progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings); - let indexer_config = self.index_mapper.indexer_config(); - let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config); - - for (task, (_, settings)) in tasks.iter_mut().zip(settings) { - let checked_settings = settings.clone().check(); - task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) }); - apply_settings_to_builder(&checked_settings, &mut builder); - - // We can apply the status right now and if an update fail later - // the whole batch will be marked as failed. - task.status = Status::Succeeded; - } - - progress.update_progress(SettingsProgress::ApplyTheSettings); - builder - .execute( - |indexing_step| tracing::debug!(update = ?indexing_step), - || must_stop_processing.get(), - ) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; - - Ok(tasks) - } - IndexOperation::DocumentClearAndSetting { - index_uid, - cleared_tasks, - settings, - settings_tasks, - } => { - let mut import_tasks = self.apply_index_operation( - index_wtxn, - index, - IndexOperation::DocumentClear { - index_uid: index_uid.clone(), - tasks: cleared_tasks, - }, - progress.clone(), - )?; - - let settings_tasks = self.apply_index_operation( - index_wtxn, - index, - IndexOperation::Settings { index_uid, settings, tasks: settings_tasks }, - progress, - )?; - - let mut tasks = settings_tasks; - tasks.append(&mut import_tasks); - Ok(tasks) - } - } - } - - /// Delete each given task from all the databases (if it is deleteable). - /// - /// Return the number of tasks that were actually deleted. - fn delete_matched_tasks( - &self, - wtxn: &mut RwTxn, - matched_tasks: &RoaringBitmap, - progress: &Progress, - ) -> Result { - progress.update_progress(TaskDeletionProgress::DeletingTasksDateTime); - - // 1. Remove from this list the tasks that we are not allowed to delete - let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?; - let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone(); - - let all_task_ids = self.all_task_ids(wtxn)?; - let mut to_delete_tasks = all_task_ids & matched_tasks; - to_delete_tasks -= &**processing_tasks; - to_delete_tasks -= &enqueued_tasks; - - // 2. We now have a list of tasks to delete, delete them - - let mut affected_indexes = HashSet::new(); - let mut affected_statuses = HashSet::new(); - let mut affected_kinds = HashSet::new(); - let mut affected_canceled_by = RoaringBitmap::new(); - // The tasks that have been removed *per batches*. - let mut affected_batches: HashMap = HashMap::new(); - - let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32); - progress.update_progress(task_progress); - for task_id in to_delete_tasks.iter() { - let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; - - affected_indexes.extend(task.indexes().into_iter().map(|x| x.to_owned())); - affected_statuses.insert(task.status); - affected_kinds.insert(task.kind.as_kind()); - // Note: don't delete the persisted task data since - // we can only delete succeeded, failed, and canceled tasks. - // In each of those cases, the persisted data is supposed to - // have been deleted already. - utils::remove_task_datetime(wtxn, self.enqueued_at, task.enqueued_at, task.uid)?; - if let Some(started_at) = task.started_at { - utils::remove_task_datetime(wtxn, self.started_at, started_at, task.uid)?; - } - if let Some(finished_at) = task.finished_at { - utils::remove_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?; - } - if let Some(canceled_by) = task.canceled_by { - affected_canceled_by.insert(canceled_by); - } - if let Some(batch_uid) = task.batch_uid { - affected_batches.entry(batch_uid).or_default().insert(task_id); - } - atomic_progress.fetch_add(1, Ordering::Relaxed); - } - - progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata); - let (atomic_progress, task_progress) = AtomicTaskStep::new( - (affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32, - ); - progress.update_progress(task_progress); - for index in affected_indexes.iter() { - self.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?; - atomic_progress.fetch_add(1, Ordering::Relaxed); - } - - for status in affected_statuses.iter() { - self.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?; - atomic_progress.fetch_add(1, Ordering::Relaxed); - } - - for kind in affected_kinds.iter() { - self.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?; - atomic_progress.fetch_add(1, Ordering::Relaxed); - } - - progress.update_progress(TaskDeletionProgress::DeletingTasks); - let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32); - progress.update_progress(task_progress); - for task in to_delete_tasks.iter() { - self.all_tasks.delete(wtxn, &task)?; - atomic_progress.fetch_add(1, Ordering::Relaxed); - } - for canceled_by in affected_canceled_by { - if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? { - tasks -= &to_delete_tasks; - if tasks.is_empty() { - self.canceled_by.delete(wtxn, &canceled_by)?; - } else { - self.canceled_by.put(wtxn, &canceled_by, &tasks)?; - } - } - } - progress.update_progress(TaskDeletionProgress::DeletingBatches); - let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32); - progress.update_progress(batch_progress); - for (batch_id, to_delete_tasks) in affected_batches { - if let Some(mut tasks) = self.batch_to_tasks_mapping.get(wtxn, &batch_id)? { - tasks -= &to_delete_tasks; - // We must remove the batch entirely - if tasks.is_empty() { - self.all_batches.delete(wtxn, &batch_id)?; - self.batch_to_tasks_mapping.delete(wtxn, &batch_id)?; - } - // Anyway, we must remove the batch from all its reverse indexes. - // The only way to do that is to check - - for index in affected_indexes.iter() { - let index_tasks = self.index_tasks(wtxn, index)?; - let remaining_index_tasks = index_tasks & &tasks; - if remaining_index_tasks.is_empty() { - self.update_batch_index(wtxn, index, |bitmap| { - bitmap.remove(batch_id); - })?; - } - } - - for status in affected_statuses.iter() { - let status_tasks = self.get_status(wtxn, *status)?; - let remaining_status_tasks = status_tasks & &tasks; - if remaining_status_tasks.is_empty() { - self.update_batch_status(wtxn, *status, |bitmap| { - bitmap.remove(batch_id); - })?; - } - } - - for kind in affected_kinds.iter() { - let kind_tasks = self.get_kind(wtxn, *kind)?; - let remaining_kind_tasks = kind_tasks & &tasks; - if remaining_kind_tasks.is_empty() { - self.update_batch_kind(wtxn, *kind, |bitmap| { - bitmap.remove(batch_id); - })?; - } - } - } - atomic_progress.fetch_add(1, Ordering::Relaxed); - } - - Ok(to_delete_tasks) - } - - /// Cancel each given task from all the databases (if it is cancelable). - /// - /// Returns the list of tasks that matched the filter and must be written in the database. - fn cancel_matched_tasks( - &self, - rtxn: &RoTxn, - cancel_task_id: TaskId, - current_batch: &mut ProcessingBatch, - matched_tasks: &RoaringBitmap, - progress: &Progress, - ) -> Result> { - progress.update_progress(TaskCancelationProgress::RetrievingTasks); - - // 1. Remove from this list the tasks that we are not allowed to cancel - // Notice that only the _enqueued_ ones are cancelable and we should - // have already aborted the indexation of the _processing_ ones - let cancelable_tasks = self.get_status(rtxn, Status::Enqueued)?; - let tasks_to_cancel = cancelable_tasks & matched_tasks; - - let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32); - progress.update_progress(progress_obj); - - // 2. We now have a list of tasks to cancel, cancel them - let mut tasks = self.get_existing_tasks( - rtxn, - tasks_to_cancel.iter().inspect(|_| { - task_progress.fetch_add(1, Ordering::Relaxed); - }), - )?; - - progress.update_progress(TaskCancelationProgress::UpdatingTasks); - let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32); - progress.update_progress(progress_obj); - for task in tasks.iter_mut() { - task.status = Status::Canceled; - task.canceled_by = Some(cancel_task_id); - task.details = task.details.as_ref().map(|d| d.to_failed()); - current_batch.processing(Some(task)); - task_progress.fetch_add(1, Ordering::Relaxed); - } - - Ok(tasks) - } -} diff --git a/crates/index-scheduler/src/dump.rs b/crates/index-scheduler/src/dump.rs new file mode 100644 index 000000000..643255ac2 --- /dev/null +++ b/crates/index-scheduler/src/dump.rs @@ -0,0 +1,203 @@ +use std::collections::HashMap; + +use dump::{KindDump, TaskDump, UpdateFile}; +use meilisearch_types::heed::RwTxn; +use meilisearch_types::milli::documents::DocumentsBatchBuilder; +use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; +use roaring::RoaringBitmap; +use uuid::Uuid; + +use crate::{utils, Error, IndexScheduler, Result}; + +pub struct Dump<'a> { + index_scheduler: &'a IndexScheduler, + wtxn: RwTxn<'a>, + + indexes: HashMap, + statuses: HashMap, + kinds: HashMap, +} + +impl<'a> Dump<'a> { + pub(crate) fn new(index_scheduler: &'a mut IndexScheduler) -> Result { + // While loading a dump no one should be able to access the scheduler thus I can block everything. + let wtxn = index_scheduler.env.write_txn()?; + + Ok(Dump { + index_scheduler, + wtxn, + indexes: HashMap::new(), + statuses: HashMap::new(), + kinds: HashMap::new(), + }) + } + + /// Register a new task coming from a dump in the scheduler. + /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running. + pub fn register_dumped_task( + &mut self, + task: TaskDump, + content_file: Option>, + ) -> Result { + let content_uuid = match content_file { + Some(content_file) if task.status == Status::Enqueued => { + let (uuid, mut file) = self.index_scheduler.queue.create_update_file(false)?; + let mut builder = DocumentsBatchBuilder::new(&mut file); + for doc in content_file { + builder.append_json_object(&doc?)?; + } + builder.into_inner()?; + file.persist()?; + + Some(uuid) + } + // If the task isn't `Enqueued` then just generate a recognisable `Uuid` + // in case we try to open it later. + _ if task.status != Status::Enqueued => Some(Uuid::nil()), + _ => None, + }; + + let task = Task { + uid: task.uid, + batch_uid: task.batch_uid, + enqueued_at: task.enqueued_at, + started_at: task.started_at, + finished_at: task.finished_at, + error: task.error, + canceled_by: task.canceled_by, + details: task.details, + status: task.status, + kind: match task.kind { + KindDump::DocumentImport { + primary_key, + method, + documents_count, + allow_index_creation, + } => KindWithContent::DocumentAdditionOrUpdate { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + primary_key, + method, + content_file: content_uuid.ok_or(Error::CorruptedDump)?, + documents_count, + allow_index_creation, + }, + KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion { + documents_ids, + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + }, + KindDump::DocumentDeletionByFilter { filter } => { + KindWithContent::DocumentDeletionByFilter { + filter_expr: filter, + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + } + } + KindDump::DocumentEdition { filter, context, function } => { + KindWithContent::DocumentEdition { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + filter_expr: filter, + context, + function, + } + } + KindDump::DocumentClear => KindWithContent::DocumentClear { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + }, + KindDump::Settings { settings, is_deletion, allow_index_creation } => { + KindWithContent::SettingsUpdate { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + new_settings: settings, + is_deletion, + allow_index_creation, + } + } + KindDump::IndexDeletion => KindWithContent::IndexDeletion { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + }, + KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + primary_key, + }, + KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + primary_key, + }, + KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps }, + KindDump::TaskCancelation { query, tasks } => { + KindWithContent::TaskCancelation { query, tasks } + } + KindDump::TasksDeletion { query, tasks } => { + KindWithContent::TaskDeletion { query, tasks } + } + KindDump::DumpCreation { keys, instance_uid } => { + KindWithContent::DumpCreation { keys, instance_uid } + } + KindDump::SnapshotCreation => KindWithContent::SnapshotCreation, + }, + }; + + self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?; + + for index in task.indexes() { + match self.indexes.get_mut(index) { + Some(bitmap) => { + bitmap.insert(task.uid); + } + None => { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert(task.uid); + self.indexes.insert(index.to_string(), bitmap); + } + }; + } + + utils::insert_task_datetime( + &mut self.wtxn, + self.index_scheduler.queue.tasks.enqueued_at, + task.enqueued_at, + task.uid, + )?; + + // we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change + if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) { + if let Some(started_at) = task.started_at { + utils::insert_task_datetime( + &mut self.wtxn, + self.index_scheduler.queue.tasks.started_at, + started_at, + task.uid, + )?; + } + if let Some(finished_at) = task.finished_at { + utils::insert_task_datetime( + &mut self.wtxn, + self.index_scheduler.queue.tasks.finished_at, + finished_at, + task.uid, + )?; + } + } + + self.statuses.entry(task.status).or_default().insert(task.uid); + self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid); + + Ok(task) + } + + /// Commit all the changes and exit the importing dump state + pub fn finish(mut self) -> Result<()> { + for (index, bitmap) in self.indexes { + self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?; + } + for (status, bitmap) in self.statuses { + self.index_scheduler.queue.tasks.put_status(&mut self.wtxn, status, &bitmap)?; + } + for (kind, bitmap) in self.kinds { + self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?; + } + + self.wtxn.commit()?; + self.index_scheduler.scheduler.wake_up.signal(); + + Ok(()) + } +} diff --git a/crates/index-scheduler/src/index_mapper/index_map.rs b/crates/index-scheduler/src/index_mapper/index_map.rs index 480dafa7c..947f558aa 100644 --- a/crates/index-scheduler/src/index_mapper/index_map.rs +++ b/crates/index-scheduler/src/index_mapper/index_map.rs @@ -323,7 +323,7 @@ mod tests { use uuid::Uuid; use super::super::IndexMapper; - use crate::tests::IndexSchedulerHandle; + use crate::test_utils::IndexSchedulerHandle; use crate::utils::clamp_to_page_size; use crate::IndexScheduler; diff --git a/crates/index-scheduler/src/index_mapper/mod.rs b/crates/index-scheduler/src/index_mapper/mod.rs index 98272542b..77cccf9b1 100644 --- a/crates/index-scheduler/src/index_mapper/mod.rs +++ b/crates/index-scheduler/src/index_mapper/mod.rs @@ -16,7 +16,7 @@ use uuid::Uuid; use self::index_map::IndexMap; use self::IndexStatus::{Available, BeingDeleted, Closing, Missing}; use crate::uuid_codec::UuidCodec; -use crate::{Error, Result}; +use crate::{Error, IndexBudget, IndexSchedulerOptions, Result}; mod index_map; @@ -140,27 +140,19 @@ impl IndexStats { impl IndexMapper { pub fn new( env: &Env, - base_path: PathBuf, - index_base_map_size: usize, - index_growth_amount: usize, - index_count: usize, - enable_mdb_writemap: bool, - indexer_config: IndexerConfig, + wtxn: &mut RwTxn, + options: &IndexSchedulerOptions, + budget: IndexBudget, ) -> Result { - let mut wtxn = env.write_txn()?; - let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?; - let index_stats = env.create_database(&mut wtxn, Some(INDEX_STATS))?; - wtxn.commit()?; - Ok(Self { - index_map: Arc::new(RwLock::new(IndexMap::new(index_count))), - index_mapping, - index_stats, - base_path, - index_base_map_size, - index_growth_amount, - enable_mdb_writemap, - indexer_config: Arc::new(indexer_config), + index_map: Arc::new(RwLock::new(IndexMap::new(budget.index_count))), + index_mapping: env.create_database(wtxn, Some(INDEX_MAPPING))?, + index_stats: env.create_database(wtxn, Some(INDEX_STATS))?, + base_path: options.indexes_path.clone(), + index_base_map_size: budget.map_size, + index_growth_amount: options.index_growth_amount, + enable_mdb_writemap: options.enable_mdb_writemap, + indexer_config: options.indexer_config.clone(), currently_updating_index: Default::default(), }) } @@ -199,6 +191,11 @@ impl IndexMapper { self.index_base_map_size, ) .map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?; + let index_rtxn = index.read_txn()?; + let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn) + .map_err(|e| Error::from_milli(e, Some(name.to_string())))?; + self.store_stats_of(&mut wtxn, name, &stats)?; + drop(index_rtxn); wtxn.commit()?; diff --git a/crates/index-scheduler/src/insta_snapshot.rs b/crates/index-scheduler/src/insta_snapshot.rs index 67627d8c1..de79cd7c0 100644 --- a/crates/index-scheduler/src/insta_snapshot.rs +++ b/crates/index-scheduler/src/insta_snapshot.rs @@ -5,11 +5,11 @@ use meilisearch_types::batches::Batch; use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::{Database, RoTxn}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; -use meilisearch_types::tasks::{Details, Task}; +use meilisearch_types::tasks::{Details, Kind, Status, Task}; use roaring::RoaringBitmap; use crate::index_mapper::IndexMapper; -use crate::{IndexScheduler, Kind, Status, BEI128}; +use crate::{IndexScheduler, BEI128}; pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { // Since we'll snapshot the index right afterward, we don't need to ensure it's internally consistent for every run. @@ -18,41 +18,14 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { scheduler.assert_internally_consistent(); let IndexScheduler { - autobatching_enabled, cleanup_enabled: _, - must_stop_processing: _, processing_tasks, - file_store, env, - all_tasks, - all_batches, - batch_to_tasks_mapping, - // task reverse index - status, - kind, - index_tasks, - canceled_by, - enqueued_at, - started_at, - finished_at, - - // batch reverse index - batch_status, - batch_kind, - batch_index_tasks, - batch_enqueued_at, - batch_started_at, - batch_finished_at, + queue, + scheduler, index_mapper, features: _, - max_number_of_tasks: _, - max_number_of_batched_tasks: _, - wake_up: _, - dumps_path: _, - snapshots_path: _, - auth_path: _, - version_file_path: _, webhook_url: _, webhook_authorization_header: _, test_breakpoint_sdr: _, @@ -66,7 +39,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { let mut snap = String::new(); let processing = processing_tasks.read().unwrap().clone(); - snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n")); + snap.push_str(&format!("### Autobatching Enabled = {}\n", scheduler.autobatching_enabled)); snap.push_str(&format!( "### Processing batch {:?}:\n", processing.batch.as_ref().map(|batch| batch.uid) @@ -79,19 +52,19 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { snap.push_str("\n----------------------------------------------------------------------\n"); snap.push_str("### All Tasks:\n"); - snap.push_str(&snapshot_all_tasks(&rtxn, *all_tasks)); + snap.push_str(&snapshot_all_tasks(&rtxn, queue.tasks.all_tasks)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Status:\n"); - snap.push_str(&snapshot_status(&rtxn, *status)); + snap.push_str(&snapshot_status(&rtxn, queue.tasks.status)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Kind:\n"); - snap.push_str(&snapshot_kind(&rtxn, *kind)); + snap.push_str(&snapshot_kind(&rtxn, queue.tasks.kind)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Index Tasks:\n"); - snap.push_str(&snapshot_index_tasks(&rtxn, *index_tasks)); + snap.push_str(&snapshot_index_tasks(&rtxn, queue.tasks.index_tasks)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Index Mapper:\n"); @@ -99,55 +72,55 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { snap.push_str("\n----------------------------------------------------------------------\n"); snap.push_str("### Canceled By:\n"); - snap.push_str(&snapshot_canceled_by(&rtxn, *canceled_by)); + snap.push_str(&snapshot_canceled_by(&rtxn, queue.tasks.canceled_by)); snap.push_str("\n----------------------------------------------------------------------\n"); snap.push_str("### Enqueued At:\n"); - snap.push_str(&snapshot_date_db(&rtxn, *enqueued_at)); + snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.enqueued_at)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Started At:\n"); - snap.push_str(&snapshot_date_db(&rtxn, *started_at)); + snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.started_at)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Finished At:\n"); - snap.push_str(&snapshot_date_db(&rtxn, *finished_at)); + snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.finished_at)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### All Batches:\n"); - snap.push_str(&snapshot_all_batches(&rtxn, *all_batches)); + snap.push_str(&snapshot_all_batches(&rtxn, queue.batches.all_batches)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Batch to tasks mapping:\n"); - snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, *batch_to_tasks_mapping)); + snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, queue.batch_to_tasks_mapping)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Batches Status:\n"); - snap.push_str(&snapshot_status(&rtxn, *batch_status)); + snap.push_str(&snapshot_status(&rtxn, queue.batches.status)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Batches Kind:\n"); - snap.push_str(&snapshot_kind(&rtxn, *batch_kind)); + snap.push_str(&snapshot_kind(&rtxn, queue.batches.kind)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Batches Index Tasks:\n"); - snap.push_str(&snapshot_index_tasks(&rtxn, *batch_index_tasks)); + snap.push_str(&snapshot_index_tasks(&rtxn, queue.batches.index_tasks)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Batches Enqueued At:\n"); - snap.push_str(&snapshot_date_db(&rtxn, *batch_enqueued_at)); + snap.push_str(&snapshot_date_db(&rtxn, queue.batches.enqueued_at)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Batches Started At:\n"); - snap.push_str(&snapshot_date_db(&rtxn, *batch_started_at)); + snap.push_str(&snapshot_date_db(&rtxn, queue.batches.started_at)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Batches Finished At:\n"); - snap.push_str(&snapshot_date_db(&rtxn, *batch_finished_at)); + snap.push_str(&snapshot_date_db(&rtxn, queue.batches.finished_at)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### File Store:\n"); - snap.push_str(&snapshot_file_store(file_store)); + snap.push_str(&snapshot_file_store(&queue.file_store)); snap.push_str("\n----------------------------------------------------------------------\n"); snap diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs index 8bceaddf6..3c50283d9 100644 --- a/crates/index-scheduler/src/lib.rs +++ b/crates/index-scheduler/src/lib.rs @@ -1,7 +1,7 @@ /*! This crate defines the index scheduler, which is responsible for: 1. Keeping references to meilisearch's indexes and mapping them to their -user-defined names. + user-defined names. 2. Scheduling tasks given by the user and executing them, in batch if possible. When an `IndexScheduler` is created, a new thread containing a reference to the @@ -18,8 +18,7 @@ called asynchronously from any thread. These methods can either query the content of the scheduler or enqueue new tasks. */ -mod autobatcher; -mod batch; +mod dump; pub mod error; mod features; mod index_mapper; @@ -27,6 +26,10 @@ mod index_mapper; mod insta_snapshot; mod lru; mod processing; +mod queue; +mod scheduler; +#[cfg(test)] +mod test_utils; mod utils; pub mod uuid_codec; @@ -35,190 +38,39 @@ pub type TaskId = u32; use std::collections::{BTreeMap, HashMap}; use std::io::{self, BufReader, Read}; -use std::ops::{Bound, RangeBounds}; use std::panic::{catch_unwind, AssertUnwindSafe}; use std::path::{Path, PathBuf}; -use std::sync::atomic::Ordering::{self, Relaxed}; -use std::sync::atomic::{AtomicBool, AtomicU32}; use std::sync::{Arc, RwLock}; use std::time::Duration; -use dump::{KindDump, TaskDump, UpdateFile}; +use dump::Dump; pub use error::Error; pub use features::RoFeatures; -use file_store::FileStore; use flate2::bufread::GzEncoder; use flate2::Compression; -use meilisearch_types::batches::{Batch, BatchId}; -use meilisearch_types::error::ResponseError; +use meilisearch_types::batches::Batch; use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; use meilisearch_types::heed::byteorder::BE; -use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128}; -use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn}; -use meilisearch_types::milli::documents::DocumentsBatchBuilder; +use meilisearch_types::heed::types::I128; +use meilisearch_types::heed::{self, Env, RoTxn}; use meilisearch_types::milli::index::IndexEmbeddingConfig; use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; -use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; +use meilisearch_types::milli::{self, Index}; use meilisearch_types::task_view::TaskView; -use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; +use meilisearch_types::tasks::{KindWithContent, Task}; use processing::ProcessingTasks; -use rayon::current_num_threads; -use rayon::prelude::{IntoParallelIterator, ParallelIterator}; +pub use queue::Query; +use queue::Queue; use roaring::RoaringBitmap; -use synchronoise::SignalEvent; -use time::format_description::well_known::Rfc3339; +use scheduler::Scheduler; use time::OffsetDateTime; -use utils::{filter_out_references_to_newer_tasks, keep_ids_within_datetimes, map_bound}; -use uuid::Uuid; use crate::index_mapper::IndexMapper; -use crate::processing::{AtomicTaskStep, BatchProgress}; -use crate::utils::{check_index_swap_validity, clamp_to_page_size}; +use crate::utils::clamp_to_page_size; pub(crate) type BEI128 = I128; -/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`]. -/// -/// An empty/default query (where each field is set to `None`) matches all tasks. -/// Each non-null field restricts the set of tasks further. -#[derive(Default, Debug, Clone, PartialEq, Eq)] -pub struct Query { - /// The maximum number of tasks to be matched - pub limit: Option, - /// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched - pub from: Option, - /// The order used to return the tasks. By default the newest tasks are returned first and the boolean is `false`. - pub reverse: Option, - /// The [task ids](`meilisearch_types::tasks::Task::uid`) to be matched - pub uids: Option>, - /// The [batch ids](`meilisearch_types::batches::Batch::uid`) to be matched - pub batch_uids: Option>, - /// The allowed [statuses](`meilisearch_types::tasks::Task::status`) of the matched tasls - pub statuses: Option>, - /// The allowed [kinds](meilisearch_types::tasks::Kind) of the matched tasks. - /// - /// The kind of a task is given by: - /// ``` - /// # use meilisearch_types::tasks::{Task, Kind}; - /// # fn doc_func(task: Task) -> Kind { - /// task.kind.as_kind() - /// # } - /// ``` - pub types: Option>, - /// The allowed [index ids](meilisearch_types::tasks::Task::index_uid) of the matched tasks - pub index_uids: Option>, - /// The [task ids](`meilisearch_types::tasks::Task::uid`) of the [`TaskCancelation`](meilisearch_types::tasks::Task::Kind::TaskCancelation) tasks - /// that canceled the matched tasks. - pub canceled_by: Option>, - /// Exclusive upper bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field. - pub before_enqueued_at: Option, - /// Exclusive lower bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field. - pub after_enqueued_at: Option, - /// Exclusive upper bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field. - pub before_started_at: Option, - /// Exclusive lower bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field. - pub after_started_at: Option, - /// Exclusive upper bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field. - pub before_finished_at: Option, - /// Exclusive lower bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field. - pub after_finished_at: Option, -} - -impl Query { - /// Return `true` if every field of the query is set to `None`, such that the query - /// matches all tasks. - pub fn is_empty(&self) -> bool { - matches!( - self, - Query { - limit: None, - from: None, - reverse: None, - uids: None, - batch_uids: None, - statuses: None, - types: None, - index_uids: None, - canceled_by: None, - before_enqueued_at: None, - after_enqueued_at: None, - before_started_at: None, - after_started_at: None, - before_finished_at: None, - after_finished_at: None, - } - ) - } - - /// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes. - pub fn with_index(self, index_uid: String) -> Self { - let mut index_vec = self.index_uids.unwrap_or_default(); - index_vec.push(index_uid); - Self { index_uids: Some(index_vec), ..self } - } - - // Removes the `from` and `limit` restrictions from the query. - // Useful to get the total number of tasks matching a filter. - pub fn without_limits(self) -> Self { - Query { limit: None, from: None, ..self } - } -} - -#[derive(Default, Clone, Debug)] -struct MustStopProcessing(Arc); - -impl MustStopProcessing { - fn get(&self) -> bool { - self.0.load(Relaxed) - } - - fn must_stop(&self) { - self.0.store(true, Relaxed); - } - - fn reset(&self) { - self.0.store(false, Relaxed); - } -} - -/// Database const names for the `IndexScheduler`. -mod db_name { - pub const ALL_TASKS: &str = "all-tasks"; - pub const ALL_BATCHES: &str = "all-batches"; - pub const BATCH_TO_TASKS_MAPPING: &str = "batch-to-tasks-mapping"; - pub const STATUS: &str = "status"; - pub const KIND: &str = "kind"; - pub const INDEX_TASKS: &str = "index-tasks"; - pub const CANCELED_BY: &str = "canceled_by"; - pub const ENQUEUED_AT: &str = "enqueued-at"; - pub const STARTED_AT: &str = "started-at"; - pub const FINISHED_AT: &str = "finished-at"; - - pub const BATCH_STATUS: &str = "batch-status"; - pub const BATCH_KIND: &str = "batch-kind"; - pub const BATCH_INDEX_TASKS: &str = "batch-index-tasks"; - pub const BATCH_ENQUEUED_AT: &str = "batch-enqueued-at"; - pub const BATCH_STARTED_AT: &str = "batch-started-at"; - pub const BATCH_FINISHED_AT: &str = "batch-finished-at"; -} - -#[cfg(test)] -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Breakpoint { - // this state is only encountered while creating the scheduler in the test suite. - Init, - - Start, - BatchCreated, - BeforeProcessing, - AfterProcessing, - AbortedIndexation, - ProcessBatchSucceeded, - ProcessBatchFailed, - InsideProcessBatch, -} - #[derive(Debug)] pub struct IndexSchedulerOptions { /// The path to the version file of Meilisearch. @@ -250,7 +102,7 @@ pub struct IndexSchedulerOptions { /// The number of indexes that can be concurrently opened in memory. pub index_count: usize, /// Configuration used during indexing for each meilisearch index. - pub indexer_config: IndexerConfig, + pub indexer_config: Arc, /// Set to `true` iff the index scheduler is allowed to automatically /// batch tasks together, to process multiple tasks at once. pub autobatching_enabled: bool, @@ -263,6 +115,9 @@ pub struct IndexSchedulerOptions { /// If the autobatcher is allowed to automatically batch tasks /// it will only batch this defined number of tasks at once. pub max_number_of_batched_tasks: usize, + /// If the autobatcher is allowed to automatically batch tasks + /// it will only batch this defined maximum size (in bytes) of tasks at once. + pub batched_tasks_size_limit: u64, /// The experimental features enabled for this instance. pub instance_features: InstanceTogglableFeatures, } @@ -273,52 +128,13 @@ pub struct IndexScheduler { /// The LMDB environment which the DBs are associated with. pub(crate) env: Env, - /// A boolean that can be set to true to stop the currently processing tasks. - pub(crate) must_stop_processing: MustStopProcessing, - /// The list of tasks currently processing pub(crate) processing_tasks: Arc>, - /// The list of files referenced by the tasks - pub(crate) file_store: FileStore, + /// The queue containing both the tasks and the batches. + pub queue: queue::Queue, - /// The main database, it contains all the tasks accessible by their Id. - pub(crate) all_tasks: Database>, - - /// Contains all the batches accessible by their Id. - pub(crate) all_batches: Database>, - - /// Matches a batch id with the associated task ids. - pub(crate) batch_to_tasks_mapping: Database, - - /// All the tasks ids grouped by their status. - // TODO we should not be able to serialize a `Status::Processing` in this database. - pub(crate) status: Database, RoaringBitmapCodec>, - /// All the tasks ids grouped by their kind. - pub(crate) kind: Database, RoaringBitmapCodec>, - /// Store the tasks associated to an index. - pub(crate) index_tasks: Database, - /// Store the tasks that were canceled by a task uid - pub(crate) canceled_by: Database, - /// Store the task ids of tasks which were enqueued at a specific date - pub(crate) enqueued_at: Database, - /// Store the task ids of finished tasks which started being processed at a specific date - pub(crate) started_at: Database, - /// Store the task ids of tasks which finished at a specific date - pub(crate) finished_at: Database, - - /// All the batches containing a task matching the selected status. - pub(crate) batch_status: Database, RoaringBitmapCodec>, - /// All the batches ids grouped by the kind of their task. - pub(crate) batch_kind: Database, RoaringBitmapCodec>, - /// Store the batches associated to an index. - pub(crate) batch_index_tasks: Database, - /// Store the batches containing tasks which were enqueued at a specific date - pub(crate) batch_enqueued_at: Database, - /// Store the batches containing finished tasks started at a specific date - pub(crate) batch_started_at: Database, - /// Store the batches containing tasks finished at a specific date - pub(crate) batch_finished_at: Database, + pub scheduler: scheduler::Scheduler, /// In charge of creating, opening, storing and returning indexes. pub(crate) index_mapper: IndexMapper, @@ -326,39 +142,14 @@ pub struct IndexScheduler { /// In charge of fetching and setting the status of experimental features. features: features::FeatureData, - /// Get a signal when a batch needs to be processed. - pub(crate) wake_up: Arc, - - /// Whether auto-batching is enabled or not. - pub(crate) autobatching_enabled: bool, - /// Whether we should automatically cleanup the task queue or not. pub(crate) cleanup_enabled: bool, - /// The max number of tasks allowed before the scheduler starts to delete - /// the finished tasks automatically. - pub(crate) max_number_of_tasks: usize, - - /// The maximum number of tasks that will be batched together. - pub(crate) max_number_of_batched_tasks: usize, - /// The webhook url we should send tasks to after processing every batches. pub(crate) webhook_url: Option, /// The Authorization header to send to the webhook URL. pub(crate) webhook_authorization_header: Option, - /// The path used to create the dumps. - pub(crate) dumps_path: PathBuf, - - /// The path used to create the snapshots. - pub(crate) snapshots_path: PathBuf, - - /// The path to the folder containing the auth LMDB env. - pub(crate) auth_path: PathBuf, - - /// The path to the version file of Meilisearch. - pub(crate) version_file_path: PathBuf, - embedders: Arc>>>, // ================= test @@ -367,13 +158,13 @@ pub struct IndexScheduler { /// /// See [self.breakpoint()](`IndexScheduler::breakpoint`) for an explanation. #[cfg(test)] - test_breakpoint_sdr: crossbeam_channel::Sender<(Breakpoint, bool)>, + test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>, /// A list of planned failures within the [`tick`](IndexScheduler::tick) method of the index scheduler. /// /// The first field is the iteration index and the second field identifies a location in the code. #[cfg(test)] - planned_failures: Vec<(usize, tests::FailureLocation)>, + planned_failures: Vec<(usize, test_utils::FailureLocation)>, /// A counter that is incremented before every call to [`tick`](IndexScheduler::tick) #[cfg(test)] @@ -384,40 +175,12 @@ impl IndexScheduler { fn private_clone(&self) -> IndexScheduler { IndexScheduler { env: self.env.clone(), - must_stop_processing: self.must_stop_processing.clone(), processing_tasks: self.processing_tasks.clone(), - file_store: self.file_store.clone(), - all_tasks: self.all_tasks, - all_batches: self.all_batches, - batch_to_tasks_mapping: self.batch_to_tasks_mapping, - - // Tasks reverse index - status: self.status, - kind: self.kind, - index_tasks: self.index_tasks, - canceled_by: self.canceled_by, - enqueued_at: self.enqueued_at, - started_at: self.started_at, - finished_at: self.finished_at, - - // Batches reverse index - batch_status: self.batch_status, - batch_kind: self.batch_kind, - batch_index_tasks: self.batch_index_tasks, - batch_enqueued_at: self.batch_enqueued_at, - batch_started_at: self.batch_started_at, - batch_finished_at: self.batch_finished_at, + queue: self.queue.private_clone(), + scheduler: self.scheduler.private_clone(), index_mapper: self.index_mapper.clone(), - wake_up: self.wake_up.clone(), - autobatching_enabled: self.autobatching_enabled, cleanup_enabled: self.cleanup_enabled, - max_number_of_tasks: self.max_number_of_tasks, - max_number_of_batched_tasks: self.max_number_of_batched_tasks, - snapshots_path: self.snapshots_path.clone(), - dumps_path: self.dumps_path.clone(), - auth_path: self.auth_path.clone(), - version_file_path: self.version_file_path.clone(), webhook_url: self.webhook_url.clone(), webhook_authorization_header: self.webhook_authorization_header.clone(), embedders: self.embedders.clone(), @@ -430,14 +193,13 @@ impl IndexScheduler { features: self.features.clone(), } } -} -impl IndexScheduler { /// Create an index scheduler and start its run loop. + #[allow(private_interfaces)] // because test_utils is private pub fn new( options: IndexSchedulerOptions, - #[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(Breakpoint, bool)>, - #[cfg(test)] planned_failures: Vec<(usize, tests::FailureLocation)>, + #[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>, + #[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>, ) -> Result { std::fs::create_dir_all(&options.tasks_path)?; std::fs::create_dir_all(&options.update_file_path)?; @@ -469,80 +231,25 @@ impl IndexScheduler { heed::EnvOpenOptions::new() .max_dbs(19) .map_size(budget.task_db_size) - .open(options.tasks_path) + .open(&options.tasks_path) }?; let features = features::FeatureData::new(&env, options.instance_features)?; - let file_store = FileStore::new(&options.update_file_path)?; - let mut wtxn = env.write_txn()?; - let all_tasks = env.create_database(&mut wtxn, Some(db_name::ALL_TASKS))?; - let all_batches = env.create_database(&mut wtxn, Some(db_name::ALL_BATCHES))?; - let batch_to_tasks_mapping = - env.create_database(&mut wtxn, Some(db_name::BATCH_TO_TASKS_MAPPING))?; - - let status = env.create_database(&mut wtxn, Some(db_name::STATUS))?; - let kind = env.create_database(&mut wtxn, Some(db_name::KIND))?; - let index_tasks = env.create_database(&mut wtxn, Some(db_name::INDEX_TASKS))?; - let canceled_by = env.create_database(&mut wtxn, Some(db_name::CANCELED_BY))?; - let enqueued_at = env.create_database(&mut wtxn, Some(db_name::ENQUEUED_AT))?; - let started_at = env.create_database(&mut wtxn, Some(db_name::STARTED_AT))?; - let finished_at = env.create_database(&mut wtxn, Some(db_name::FINISHED_AT))?; - - let batch_status = env.create_database(&mut wtxn, Some(db_name::BATCH_STATUS))?; - let batch_kind = env.create_database(&mut wtxn, Some(db_name::BATCH_KIND))?; - let batch_index_tasks = env.create_database(&mut wtxn, Some(db_name::BATCH_INDEX_TASKS))?; - let batch_enqueued_at = env.create_database(&mut wtxn, Some(db_name::BATCH_ENQUEUED_AT))?; - let batch_started_at = env.create_database(&mut wtxn, Some(db_name::BATCH_STARTED_AT))?; - let batch_finished_at = env.create_database(&mut wtxn, Some(db_name::BATCH_FINISHED_AT))?; + let queue = Queue::new(&env, &mut wtxn, &options)?; + let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?; wtxn.commit()?; // allow unreachable_code to get rids of the warning in the case of a test build. let this = Self { - must_stop_processing: MustStopProcessing::default(), processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())), - file_store, - all_tasks, - all_batches, - batch_to_tasks_mapping, - // Task reverse indexes - status, - kind, - index_tasks, - canceled_by, - enqueued_at, - started_at, - finished_at, + queue, + scheduler: Scheduler::new(&options), - // Batch reverse indexes - batch_status, - batch_kind, - batch_index_tasks, - batch_enqueued_at, - batch_started_at, - batch_finished_at, - - index_mapper: IndexMapper::new( - &env, - options.indexes_path, - budget.map_size, - options.index_growth_amount, - budget.index_count, - options.enable_mdb_writemap, - options.indexer_config, - )?, + index_mapper, env, - // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things - wake_up: Arc::new(SignalEvent::auto(true)), - autobatching_enabled: options.autobatching_enabled, cleanup_enabled: options.cleanup_enabled, - max_number_of_tasks: options.max_number_of_tasks, - max_number_of_batched_tasks: options.max_number_of_batched_tasks, - dumps_path: options.dumps_path, - snapshots_path: options.snapshots_path, - auth_path: options.auth_path, - version_file_path: options.version_file_path, webhook_url: options.webhook_url, webhook_authorization_header: options.webhook_authorization_header, embedders: Default::default(), @@ -563,7 +270,7 @@ impl IndexScheduler { /// Return `Ok(())` if the index scheduler is able to access one of its database. pub fn health(&self) -> Result<()> { let rtxn = self.env.read_txn()?; - self.all_tasks.first(&rtxn)?; + self.queue.batch_to_tasks_mapping.first(&rtxn)?; Ok(()) } @@ -650,15 +357,15 @@ impl IndexScheduler { .name(String::from("scheduler")) .spawn(move || { #[cfg(test)] - run.breakpoint(Breakpoint::Init); + run.breakpoint(test_utils::Breakpoint::Init); - run.wake_up.wait_timeout(std::time::Duration::from_secs(60)); + run.scheduler.wake_up.wait_timeout(std::time::Duration::from_secs(60)); loop { let ret = catch_unwind(AssertUnwindSafe(|| run.tick())); match ret { Ok(Ok(TickOutcome::TickAgain(_))) => (), - Ok(Ok(TickOutcome::WaitForSignal)) => run.wake_up.wait(), + Ok(Ok(TickOutcome::WaitForSignal)) => run.scheduler.wake_up.wait(), Ok(Err(e)) => { tracing::error!("{e}"); // Wait one second when an irrecoverable error occurs. @@ -704,14 +411,14 @@ impl IndexScheduler { /// If you need to fetch information from or perform an action on all indexes, /// see the `try_for_each_index` function. pub fn index(&self, name: &str) -> Result { - let rtxn = self.env.read_txn()?; - self.index_mapper.index(&rtxn, name) + self.index_mapper.index(&self.env.read_txn()?, name) } + /// Return the boolean referring if index exists. pub fn index_exists(&self, name: &str) -> Result { - let rtxn = self.env.read_txn()?; - self.index_mapper.index_exists(&rtxn, name) + self.index_mapper.index_exists(&self.env.read_txn()?, name) } + /// Return the name of all indexes without opening them. pub fn index_names(&self) -> Result> { let rtxn = self.env.read_txn()?; @@ -736,391 +443,6 @@ impl IndexScheduler { self.index_mapper.try_for_each_index(&rtxn, f) } - /// Return the task ids matched by the given query from the index scheduler's point of view. - pub(crate) fn get_task_ids(&self, rtxn: &RoTxn, query: &Query) -> Result { - let ProcessingTasks { batch: processing_batch, processing: processing_tasks, progress: _ } = - self.processing_tasks.read().unwrap().clone(); - let Query { - limit, - from, - reverse, - uids, - batch_uids, - statuses, - types, - index_uids, - canceled_by, - before_enqueued_at, - after_enqueued_at, - before_started_at, - after_started_at, - before_finished_at, - after_finished_at, - } = query; - - let mut tasks = self.all_task_ids(rtxn)?; - - if let Some(from) = from { - let range = if reverse.unwrap_or_default() { - u32::MIN..*from - } else { - from.saturating_add(1)..u32::MAX - }; - tasks.remove_range(range); - } - - if let Some(batch_uids) = batch_uids { - let mut batch_tasks = RoaringBitmap::new(); - for batch_uid in batch_uids { - if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) { - batch_tasks |= &*processing_tasks; - } else { - batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?; - } - } - tasks &= batch_tasks; - } - - if let Some(status) = statuses { - let mut status_tasks = RoaringBitmap::new(); - for status in status { - match status { - // special case for Processing tasks - Status::Processing => { - status_tasks |= &*processing_tasks; - } - status => status_tasks |= &self.get_status(rtxn, *status)?, - }; - } - if !status.contains(&Status::Processing) { - tasks -= &*processing_tasks; - } - tasks &= status_tasks; - } - - if let Some(uids) = uids { - let uids = RoaringBitmap::from_iter(uids); - tasks &= &uids; - } - - if let Some(canceled_by) = canceled_by { - let mut all_canceled_tasks = RoaringBitmap::new(); - for cancel_task_uid in canceled_by { - if let Some(canceled_by_uid) = self.canceled_by.get(rtxn, cancel_task_uid)? { - all_canceled_tasks |= canceled_by_uid; - } - } - - // if the canceled_by has been specified but no task - // matches then we prefer matching zero than all tasks. - if all_canceled_tasks.is_empty() { - return Ok(RoaringBitmap::new()); - } else { - tasks &= all_canceled_tasks; - } - } - - if let Some(kind) = types { - let mut kind_tasks = RoaringBitmap::new(); - for kind in kind { - kind_tasks |= self.get_kind(rtxn, *kind)?; - } - tasks &= &kind_tasks; - } - - if let Some(index) = index_uids { - let mut index_tasks = RoaringBitmap::new(); - for index in index { - index_tasks |= self.index_tasks(rtxn, index)?; - } - tasks &= &index_tasks; - } - - // For the started_at filter, we need to treat the part of the tasks that are processing from the part of the - // tasks that are not processing. The non-processing ones are filtered normally while the processing ones - // are entirely removed unless the in-memory startedAt variable falls within the date filter. - // Once we have filtered the two subsets, we put them back together and assign it back to `tasks`. - tasks = { - let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) = - (&tasks - &*processing_tasks, &tasks & &*processing_tasks); - - // special case for Processing tasks - // A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds - let clear_filtered_processing_tasks = - |start: Bound, end: Bound| { - let start = map_bound(start, |b| b.unix_timestamp_nanos()); - let end = map_bound(end, |b| b.unix_timestamp_nanos()); - let is_within_dates = RangeBounds::contains( - &(start, end), - &processing_batch - .map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at) - .unix_timestamp_nanos(), - ); - if !is_within_dates { - filtered_processing_tasks.clear(); - } - }; - match (after_started_at, before_started_at) { - (None, None) => (), - (None, Some(before)) => { - clear_filtered_processing_tasks(Bound::Unbounded, Bound::Excluded(*before)) - } - (Some(after), None) => { - clear_filtered_processing_tasks(Bound::Excluded(*after), Bound::Unbounded) - } - (Some(after), Some(before)) => clear_filtered_processing_tasks( - Bound::Excluded(*after), - Bound::Excluded(*before), - ), - }; - - keep_ids_within_datetimes( - rtxn, - &mut filtered_non_processing_tasks, - self.started_at, - *after_started_at, - *before_started_at, - )?; - filtered_non_processing_tasks | filtered_processing_tasks - }; - - keep_ids_within_datetimes( - rtxn, - &mut tasks, - self.enqueued_at, - *after_enqueued_at, - *before_enqueued_at, - )?; - - keep_ids_within_datetimes( - rtxn, - &mut tasks, - self.finished_at, - *after_finished_at, - *before_finished_at, - )?; - - if let Some(limit) = limit { - tasks = if query.reverse.unwrap_or_default() { - tasks.into_iter().take(*limit as usize).collect() - } else { - tasks.into_iter().rev().take(*limit as usize).collect() - }; - } - - Ok(tasks) - } - - /// Return the batch ids matched by the given query from the index scheduler's point of view. - pub(crate) fn get_batch_ids( - &self, - rtxn: &RoTxn, - processing: &ProcessingTasks, - query: &Query, - ) -> Result { - let Query { - limit, - from, - reverse, - uids, - batch_uids, - statuses, - types, - index_uids, - canceled_by, - before_enqueued_at, - after_enqueued_at, - before_started_at, - after_started_at, - before_finished_at, - after_finished_at, - } = query; - - let mut batches = self.all_batch_ids(rtxn)?; - if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) { - batches.insert(batch_id); - } - - if let Some(from) = from { - let range = if reverse.unwrap_or_default() { - u32::MIN..*from - } else { - from.saturating_add(1)..u32::MAX - }; - batches.remove_range(range); - } - - if let Some(batch_uids) = &batch_uids { - let batches_uids = RoaringBitmap::from_iter(batch_uids); - batches &= batches_uids; - } - - if let Some(status) = &statuses { - let mut status_batches = RoaringBitmap::new(); - for status in status { - match status { - // special case for Processing batches - Status::Processing => { - if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) { - status_batches.insert(batch_id); - } - } - // Enqueued tasks are not stored in batches - Status::Enqueued => (), - status => status_batches |= &self.get_batch_status(rtxn, *status)?, - }; - } - if !status.contains(&Status::Processing) { - if let Some(ref batch) = processing.batch { - batches.remove(batch.uid); - } - } - batches &= status_batches; - } - - if let Some(task_uids) = &uids { - let mut batches_by_task_uids = RoaringBitmap::new(); - for task_uid in task_uids { - if let Some(task) = self.get_task(rtxn, *task_uid)? { - if let Some(batch_uid) = task.batch_uid { - batches_by_task_uids.insert(batch_uid); - } - } - } - batches &= batches_by_task_uids; - } - - // There is no database for this query, we must retrieve the task queried by the client and ensure it's valid - if let Some(canceled_by) = &canceled_by { - let mut all_canceled_batches = RoaringBitmap::new(); - for cancel_uid in canceled_by { - if let Some(task) = self.get_task(rtxn, *cancel_uid)? { - if task.kind.as_kind() == Kind::TaskCancelation - && task.status == Status::Succeeded - { - if let Some(batch_uid) = task.batch_uid { - all_canceled_batches.insert(batch_uid); - } - } - } - } - - // if the canceled_by has been specified but no batch - // matches then we prefer matching zero than all batches. - if all_canceled_batches.is_empty() { - return Ok(RoaringBitmap::new()); - } else { - batches &= all_canceled_batches; - } - } - - if let Some(kind) = &types { - let mut kind_batches = RoaringBitmap::new(); - for kind in kind { - kind_batches |= self.get_batch_kind(rtxn, *kind)?; - if let Some(uid) = processing - .batch - .as_ref() - .and_then(|batch| batch.kinds.contains(kind).then_some(batch.uid)) - { - kind_batches.insert(uid); - } - } - batches &= &kind_batches; - } - - if let Some(index) = &index_uids { - let mut index_batches = RoaringBitmap::new(); - for index in index { - index_batches |= self.index_batches(rtxn, index)?; - if let Some(uid) = processing - .batch - .as_ref() - .and_then(|batch| batch.indexes.contains(index).then_some(batch.uid)) - { - index_batches.insert(uid); - } - } - batches &= &index_batches; - } - - // For the started_at filter, we need to treat the part of the batches that are processing from the part of the - // batches that are not processing. The non-processing ones are filtered normally while the processing ones - // are entirely removed unless the in-memory startedAt variable falls within the date filter. - // Once we have filtered the two subsets, we put them back together and assign it back to `batches`. - batches = { - let (mut filtered_non_processing_batches, mut filtered_processing_batches) = - (&batches - &*processing.processing, &batches & &*processing.processing); - - // special case for Processing batches - // A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds - let mut clear_filtered_processing_batches = - |start: Bound, end: Bound| { - let start = map_bound(start, |b| b.unix_timestamp_nanos()); - let end = map_bound(end, |b| b.unix_timestamp_nanos()); - let is_within_dates = RangeBounds::contains( - &(start, end), - &processing - .batch - .as_ref() - .map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at) - .unix_timestamp_nanos(), - ); - if !is_within_dates { - filtered_processing_batches.clear(); - } - }; - match (after_started_at, before_started_at) { - (None, None) => (), - (None, Some(before)) => { - clear_filtered_processing_batches(Bound::Unbounded, Bound::Excluded(*before)) - } - (Some(after), None) => { - clear_filtered_processing_batches(Bound::Excluded(*after), Bound::Unbounded) - } - (Some(after), Some(before)) => clear_filtered_processing_batches( - Bound::Excluded(*after), - Bound::Excluded(*before), - ), - }; - - keep_ids_within_datetimes( - rtxn, - &mut filtered_non_processing_batches, - self.batch_started_at, - *after_started_at, - *before_started_at, - )?; - filtered_non_processing_batches | filtered_processing_batches - }; - - keep_ids_within_datetimes( - rtxn, - &mut batches, - self.batch_enqueued_at, - *after_enqueued_at, - *before_enqueued_at, - )?; - - keep_ids_within_datetimes( - rtxn, - &mut batches, - self.batch_finished_at, - *after_finished_at, - *before_finished_at, - )?; - - if let Some(limit) = limit { - batches = if query.reverse.unwrap_or_default() { - batches.into_iter().take(*limit as usize).collect() - } else { - batches.into_iter().rev().take(*limit as usize).collect() - }; - } - - Ok(batches) - } - /// Returns the total number of indexes available for the specified filter. /// And a `Vec` of the index_uid + its stats pub fn get_paginated_indexes_stats( @@ -1169,40 +491,7 @@ impl IndexScheduler { /// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example. /// 3. The number of times the properties appeared. pub fn get_stats(&self) -> Result>> { - let rtxn = self.read_txn()?; - - let mut res = BTreeMap::new(); - - let processing_tasks = { self.processing_tasks.read().unwrap().processing.len() }; - - res.insert( - "statuses".to_string(), - enum_iterator::all::() - .map(|s| { - let tasks = self.get_status(&rtxn, s)?.len(); - match s { - Status::Enqueued => Ok((s.to_string(), tasks - processing_tasks)), - Status::Processing => Ok((s.to_string(), processing_tasks)), - s => Ok((s.to_string(), tasks)), - } - }) - .collect::>>()?, - ); - res.insert( - "types".to_string(), - enum_iterator::all::() - .map(|s| Ok((s.to_string(), self.get_kind(&rtxn, s)?.len()))) - .collect::>>()?, - ); - res.insert( - "indexes".to_string(), - self.index_tasks - .iter(&rtxn)? - .map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?)) - .collect::>>()?, - ); - - Ok(res) + self.queue.get_stats(&self.read_txn()?, &self.processing_tasks.read().unwrap()) } // Return true if there is at least one task that is processing. @@ -1215,131 +504,11 @@ impl IndexScheduler { pub fn is_index_processing(&self, index: &str) -> Result { let rtxn = self.env.read_txn()?; let processing_tasks = self.processing_tasks.read().unwrap().processing.clone(); - let index_tasks = self.index_tasks(&rtxn, index)?; + let index_tasks = self.queue.tasks.index_tasks(&rtxn, index)?; let nbr_index_processing_tasks = processing_tasks.intersection_len(&index_tasks); Ok(nbr_index_processing_tasks > 0) } - /// Return the task ids matching the query along with the total number of tasks - /// by ignoring the from and limit parameters from the user's point of view. - /// - /// There are two differences between an internal query and a query executed by - /// the user. - /// - /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated - /// with many indexes internally. - /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. - pub fn get_task_ids_from_authorized_indexes( - &self, - rtxn: &RoTxn, - query: &Query, - filters: &meilisearch_auth::AuthFilter, - ) -> Result<(RoaringBitmap, u64)> { - // compute all tasks matching the filter by ignoring the limits, to find the number of tasks matching - // the filter. - // As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares - // us from modifying the underlying implementation, and the performance remains sufficient. - // Should this change, we would modify `get_task_ids` to directly return the number of matching tasks. - let total_tasks = self.get_task_ids(rtxn, &query.clone().without_limits())?; - let mut tasks = self.get_task_ids(rtxn, query)?; - - // If the query contains a list of index uid or there is a finite list of authorized indexes, - // then we must exclude all the kinds that aren't associated to one and only one index. - if query.index_uids.is_some() || !filters.all_indexes_authorized() { - for kind in enum_iterator::all::().filter(|kind| !kind.related_to_one_index()) { - tasks -= self.get_kind(rtxn, kind)?; - } - } - - // Any task that is internally associated with a non-authorized index - // must be discarded. - if !filters.all_indexes_authorized() { - let all_indexes_iter = self.index_tasks.iter(rtxn)?; - for result in all_indexes_iter { - let (index, index_tasks) = result?; - if !filters.is_index_authorized(index) { - tasks -= index_tasks; - } - } - } - - Ok((tasks, total_tasks.len())) - } - - /// Return the batch ids matching the query along with the total number of batches - /// by ignoring the from and limit parameters from the user's point of view. - /// - /// There are two differences between an internal query and a query executed by - /// the user. - /// - /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated - /// with many indexes internally. - /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. - fn get_batch_ids_from_authorized_indexes( - &self, - rtxn: &RoTxn, - processing: &ProcessingTasks, - query: &Query, - filters: &meilisearch_auth::AuthFilter, - ) -> Result<(RoaringBitmap, u64)> { - // compute all batches matching the filter by ignoring the limits, to find the number of batches matching - // the filter. - // As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares - // us from modifying the underlying implementation, and the performance remains sufficient. - // Should this change, we would modify `get_batch_ids` to directly return the number of matching batches. - let total_batches = - self.get_batch_ids(rtxn, processing, &query.clone().without_limits())?; - let mut batches = self.get_batch_ids(rtxn, processing, query)?; - - // If the query contains a list of index uid or there is a finite list of authorized indexes, - // then we must exclude all the batches that only contains tasks associated to multiple indexes. - // This works because we don't autobatch tasks associated to multiple indexes with tasks associated - // to a single index. e.g: IndexSwap cannot be batched with IndexCreation. - if query.index_uids.is_some() || !filters.all_indexes_authorized() { - for kind in enum_iterator::all::().filter(|kind| !kind.related_to_one_index()) { - batches -= self.get_kind(rtxn, kind)?; - if let Some(batch) = processing.batch.as_ref() { - if batch.kinds.contains(&kind) { - batches.remove(batch.uid); - } - } - } - } - - // Any batch that is internally associated with at least one authorized index - // must be returned. - if !filters.all_indexes_authorized() { - let mut valid_indexes = RoaringBitmap::new(); - let mut forbidden_indexes = RoaringBitmap::new(); - - let all_indexes_iter = self.batch_index_tasks.iter(rtxn)?; - for result in all_indexes_iter { - let (index, index_tasks) = result?; - if filters.is_index_authorized(index) { - valid_indexes |= index_tasks; - } else { - forbidden_indexes |= index_tasks; - } - } - if let Some(batch) = processing.batch.as_ref() { - for index in &batch.indexes { - if filters.is_index_authorized(index) { - valid_indexes.insert(batch.uid); - } else { - forbidden_indexes.insert(batch.uid); - } - } - } - - // If a batch had ONE valid task then it should be returned - let invalid_batches = forbidden_indexes - valid_indexes; - - batches -= invalid_batches; - } - - Ok((batches, total_batches.len())) - } - /// Return the tasks matching the query from the user's point of view along /// with the total number of tasks matching the query, ignoring from and limit. /// @@ -1347,53 +516,35 @@ impl IndexScheduler { /// the user. /// /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated - /// with many indexes internally. + /// with many indexes internally. /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. pub fn get_tasks_from_authorized_indexes( &self, - query: Query, + query: &Query, filters: &meilisearch_auth::AuthFilter, ) -> Result<(Vec, u64)> { - let rtxn = self.env.read_txn()?; + let rtxn = self.read_txn()?; + let processing = self.processing_tasks.read().unwrap(); + self.queue.get_tasks_from_authorized_indexes(&rtxn, query, filters, &processing) + } - let (tasks, total) = self.get_task_ids_from_authorized_indexes(&rtxn, &query, filters)?; - let tasks = if query.reverse.unwrap_or_default() { - Box::new(tasks.into_iter()) as Box> - } else { - Box::new(tasks.into_iter().rev()) as Box> - }; - let tasks = - self.get_existing_tasks(&rtxn, tasks.take(query.limit.unwrap_or(u32::MAX) as usize))?; - - let ProcessingTasks { batch, processing, progress } = - self.processing_tasks.read().map_err(|_| Error::CorruptedTaskQueue)?.clone(); - - // ignored for now, might be added to batch details later - let _ = progress; - - let ret = tasks.into_iter(); - if processing.is_empty() || batch.is_none() { - Ok((ret.collect(), total)) - } else { - // Safe because we ensured there was a batch in the previous branch - let batch = batch.unwrap(); - Ok(( - ret.map(|task| { - if processing.contains(task.uid) { - Task { - status: Status::Processing, - batch_uid: Some(batch.uid), - started_at: Some(batch.started_at), - ..task - } - } else { - task - } - }) - .collect(), - total, - )) - } + /// Return the task ids matching the query along with the total number of tasks + /// by ignoring the from and limit parameters from the user's point of view. + /// + /// There are two differences between an internal query and a query executed by + /// the user. + /// + /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated + /// with many indexes internally. + /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. + pub fn get_task_ids_from_authorized_indexes( + &self, + query: &Query, + filters: &meilisearch_auth::AuthFilter, + ) -> Result<(RoaringBitmap, u64)> { + let rtxn = self.read_txn()?; + let processing = self.processing_tasks.read().unwrap(); + self.queue.get_task_ids_from_authorized_indexes(&rtxn, query, filters, &processing) } /// Return the batches matching the query from the user's point of view along @@ -1403,31 +554,35 @@ impl IndexScheduler { /// the user. /// /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated - /// with many indexes internally. + /// with many indexes internally. /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. pub fn get_batches_from_authorized_indexes( &self, - query: Query, + query: &Query, filters: &meilisearch_auth::AuthFilter, ) -> Result<(Vec, u64)> { - let rtxn = self.env.read_txn()?; - let processing = self.processing_tasks.read().unwrap().clone(); + let rtxn = self.read_txn()?; + let processing = self.processing_tasks.read().unwrap(); + self.queue.get_batches_from_authorized_indexes(&rtxn, query, filters, &processing) + } - let (batches, total) = - self.get_batch_ids_from_authorized_indexes(&rtxn, &processing, &query, filters)?; - let batches = if query.reverse.unwrap_or_default() { - Box::new(batches.into_iter()) as Box> - } else { - Box::new(batches.into_iter().rev()) as Box> - }; - - let batches = self.get_existing_batches( - &rtxn, - &processing, - batches.take(query.limit.unwrap_or(u32::MAX) as usize), - )?; - - Ok((batches, total)) + /// Return the batch ids matching the query along with the total number of batches + /// by ignoring the from and limit parameters from the user's point of view. + /// + /// There are two differences between an internal query and a query executed by + /// the user. + /// + /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated + /// with many indexes internally. + /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. + pub fn get_batch_ids_from_authorized_indexes( + &self, + query: &Query, + filters: &meilisearch_auth::AuthFilter, + ) -> Result<(RoaringBitmap, u64)> { + let rtxn = self.read_txn()?; + let processing = self.processing_tasks.read().unwrap(); + self.queue.get_batch_ids_from_authorized_indexes(&rtxn, query, filters, &processing) } /// Register a new task in the scheduler. @@ -1439,8 +594,6 @@ impl IndexScheduler { task_id: Option, dry_run: bool, ) -> Result { - let mut wtxn = self.env.write_txn()?; - // if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty()) && (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 40 @@ -1448,64 +601,8 @@ impl IndexScheduler { return Err(Error::NoSpaceLeftInTaskQueue); } - let next_task_id = self.next_task_id(&wtxn)?; - - if let Some(uid) = task_id { - if uid < next_task_id { - return Err(Error::BadTaskId { received: uid, expected: next_task_id }); - } - } - - let mut task = Task { - uid: task_id.unwrap_or(next_task_id), - // The batch is defined once we starts processing the task - batch_uid: None, - enqueued_at: OffsetDateTime::now_utc(), - started_at: None, - finished_at: None, - error: None, - canceled_by: None, - details: kind.default_details(), - status: Status::Enqueued, - kind: kind.clone(), - }; - // For deletion and cancelation tasks, we want to make extra sure that they - // don't attempt to delete/cancel tasks that are newer than themselves. - filter_out_references_to_newer_tasks(&mut task); - // If the register task is an index swap task, verify that it is well-formed - // (that it does not contain duplicate indexes). - check_index_swap_validity(&task)?; - - // At this point the task is going to be registered and no further checks will be done - if dry_run { - return Ok(task); - } - - // Get rid of the mutability. - let task = task; - - self.all_tasks.put_with_flags(&mut wtxn, PutFlags::APPEND, &task.uid, &task)?; - - for index in task.indexes() { - self.update_index(&mut wtxn, index, |bitmap| { - bitmap.insert(task.uid); - })?; - } - - self.update_status(&mut wtxn, Status::Enqueued, |bitmap| { - bitmap.insert(task.uid); - })?; - - self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| { - bitmap.insert(task.uid); - })?; - - utils::insert_task_datetime(&mut wtxn, self.enqueued_at, task.enqueued_at, task.uid)?; - - if let Err(e) = wtxn.commit() { - self.delete_persisted_task_data(&task)?; - return Err(e.into()); - } + let mut wtxn = self.env.write_txn()?; + let task = self.queue.register(&mut wtxn, &kind, task_id, dry_run)?; // If the registered task is a task cancelation // we inform the processing tasks to stop (if necessary). @@ -1513,13 +610,17 @@ impl IndexScheduler { let tasks_to_cancel = RoaringBitmap::from_iter(tasks); if self.processing_tasks.read().unwrap().must_cancel_processing_tasks(&tasks_to_cancel) { - self.must_stop_processing.must_stop(); + self.scheduler.must_stop_processing.must_stop(); } } - // notify the scheduler loop to execute a new tick - self.wake_up.signal(); + if let Err(e) = wtxn.commit() { + self.queue.delete_persisted_task_data(&task)?; + return Err(e.into()); + } + // notify the scheduler loop to execute a new tick + self.scheduler.wake_up.signal(); Ok(task) } @@ -1553,263 +654,6 @@ impl IndexScheduler { Ok(()) } - /// Create a file and register it in the index scheduler. - /// - /// The returned file and uuid can be used to associate - /// some data to a task. The file will be kept until - /// the task has been fully processed. - pub fn create_update_file(&self, dry_run: bool) -> Result<(Uuid, file_store::File)> { - if dry_run { - Ok((Uuid::nil(), file_store::File::dry_file()?)) - } else { - Ok(self.file_store.new_update()?) - } - } - - #[cfg(test)] - pub fn create_update_file_with_uuid(&self, uuid: u128) -> Result<(Uuid, file_store::File)> { - Ok(self.file_store.new_update_with_uuid(uuid)?) - } - - /// The size on disk taken by all the updates files contained in the `IndexScheduler`, in bytes. - pub fn compute_update_file_size(&self) -> Result { - Ok(self.file_store.compute_total_size()?) - } - - /// Delete a file from the index scheduler. - /// - /// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method. - pub fn delete_update_file(&self, uuid: Uuid) -> Result<()> { - Ok(self.file_store.delete(uuid)?) - } - - /// Perform one iteration of the run loop. - /// - /// 1. See if we need to cleanup the task queue - /// 2. Find the next batch of tasks to be processed. - /// 3. Update the information of these tasks following the start of their processing. - /// 4. Update the in-memory list of processed tasks accordingly. - /// 5. Process the batch: - /// - perform the actions of each batched task - /// - update the information of each batched task following the end - /// of their processing. - /// 6. Reset the in-memory list of processed tasks. - /// - /// Returns the number of processed tasks. - fn tick(&self) -> Result { - #[cfg(test)] - { - *self.run_loop_iteration.write().unwrap() += 1; - self.breakpoint(Breakpoint::Start); - } - - if self.cleanup_enabled { - self.cleanup_task_queue()?; - } - - let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; - let (batch, mut processing_batch) = - match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? { - Some(batch) => batch, - None => return Ok(TickOutcome::WaitForSignal), - }; - let index_uid = batch.index_uid().map(ToOwned::to_owned); - drop(rtxn); - - // 1. store the starting date with the bitmap of processing tasks. - let mut ids = batch.ids(); - let processed_tasks = ids.len(); - - // We reset the must_stop flag to be sure that we don't stop processing tasks - self.must_stop_processing.reset(); - let progress = self - .processing_tasks - .write() - .unwrap() - // We can clone the processing batch here because we don't want its modification to affect the view of the processing batches - .start_processing(processing_batch.clone(), ids.clone()); - - #[cfg(test)] - self.breakpoint(Breakpoint::BatchCreated); - - // 2. Process the tasks - let res = { - let cloned_index_scheduler = self.private_clone(); - let processing_batch = &mut processing_batch; - let progress = progress.clone(); - std::thread::scope(|s| { - let handle = std::thread::Builder::new() - .name(String::from("batch-operation")) - .spawn_scoped(s, move || { - cloned_index_scheduler.process_batch(batch, processing_batch, progress) - }) - .unwrap(); - handle.join().unwrap_or(Err(Error::ProcessBatchPanicked)) - }) - }; - - // Reset the currently updating index to relinquish the index handle - self.index_mapper.set_currently_updating_index(None); - - #[cfg(test)] - self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?; - - progress.update_progress(BatchProgress::WritingTasksToDisk); - processing_batch.finished(); - let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; - let mut canceled = RoaringBitmap::new(); - - match res { - Ok(tasks) => { - #[cfg(test)] - self.breakpoint(Breakpoint::ProcessBatchSucceeded); - - let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32); - progress.update_progress(task_progress_obj); - let mut success = 0; - let mut failure = 0; - let mut canceled_by = None; - - #[allow(unused_variables)] - for (i, mut task) in tasks.into_iter().enumerate() { - task_progress.fetch_add(1, Ordering::Relaxed); - processing_batch.update(&mut task); - if task.status == Status::Canceled { - canceled.insert(task.uid); - canceled_by = task.canceled_by; - } - - #[cfg(test)] - self.maybe_fail( - tests::FailureLocation::UpdatingTaskAfterProcessBatchSuccess { - task_uid: i as u32, - }, - )?; - - match task.error { - Some(_) => failure += 1, - None => success += 1, - } - - self.update_task(&mut wtxn, &task) - .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; - } - if let Some(canceled_by) = canceled_by { - self.canceled_by.put(&mut wtxn, &canceled_by, &canceled)?; - } - tracing::info!("A batch of tasks was successfully completed with {success} successful tasks and {failure} failed tasks."); - } - // If we have an abortion error we must stop the tick here and re-schedule tasks. - Err(Error::Milli { - error: milli::Error::InternalError(milli::InternalError::AbortedIndexation), - .. - }) - | Err(Error::AbortedTask) => { - #[cfg(test)] - self.breakpoint(Breakpoint::AbortedIndexation); - wtxn.abort(); - - tracing::info!("A batch of tasks was aborted."); - // We make sure that we don't call `stop_processing` on the `processing_tasks`, - // this is because we want to let the next tick call `create_next_batch` and keep - // the `started_at` date times and `processings` of the current processing tasks. - // This date time is used by the task cancelation to store the right `started_at` - // date in the task on disk. - return Ok(TickOutcome::TickAgain(0)); - } - // If an index said it was full, we need to: - // 1. identify which index is full - // 2. close the associated environment - // 3. resize it - // 4. re-schedule tasks - Err(Error::Milli { - error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached), - .. - }) if index_uid.is_some() => { - // fixme: add index_uid to match to avoid the unwrap - let index_uid = index_uid.unwrap(); - // fixme: handle error more gracefully? not sure when this could happen - self.index_mapper.resize_index(&wtxn, &index_uid)?; - wtxn.abort(); - - tracing::info!("The max database size was reached. Resizing the index."); - - return Ok(TickOutcome::TickAgain(0)); - } - // In case of a failure we must get back and patch all the tasks with the error. - Err(err) => { - #[cfg(test)] - self.breakpoint(Breakpoint::ProcessBatchFailed); - let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32); - progress.update_progress(task_progress_obj); - - let error: ResponseError = err.into(); - for id in ids.iter() { - task_progress.fetch_add(1, Ordering::Relaxed); - let mut task = self - .get_task(&wtxn, id) - .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? - .ok_or(Error::CorruptedTaskQueue)?; - task.status = Status::Failed; - task.error = Some(error.clone()); - task.details = task.details.map(|d| d.to_failed()); - processing_batch.update(&mut task); - - #[cfg(test)] - self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?; - - tracing::error!("Batch failed {}", error); - - self.update_task(&mut wtxn, &task) - .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; - } - } - } - - // We must re-add the canceled task so they're part of the same batch. - ids |= canceled; - self.write_batch(&mut wtxn, processing_batch, &ids)?; - - #[cfg(test)] - self.maybe_fail(tests::FailureLocation::CommittingWtxn)?; - - wtxn.commit().map_err(Error::HeedTransaction)?; - - // We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task - // and then become « not found » for some time until the commit everything is written and the final commit is made. - self.processing_tasks.write().unwrap().stop_processing(); - - // Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart - tracing::debug!("Deleting the update files"); - - //We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap - let idx = AtomicU32::new(0); - (0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> { - let rtxn = self.read_txn()?; - while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) { - let task = self - .get_task(&rtxn, id) - .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? - .ok_or(Error::CorruptedTaskQueue)?; - if let Err(e) = self.delete_persisted_task_data(&task) { - tracing::error!( - "Failure to delete the content files associated with task {}. Error: {e}", - task.uid - ); - } - } - Ok(()) - })?; - - // We shouldn't crash the tick function if we can't send data to the webhook. - let _ = self.notify_webhook(&ids); - - #[cfg(test)] - self.breakpoint(Breakpoint::AfterProcessing); - - Ok(TickOutcome::TickAgain(processed_tasks)) - } - /// Once the tasks changes have been committed we must send all the tasks that were updated to our webhook if there is one. fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> { if let Some(ref url) = self.webhook_url { @@ -1829,6 +673,8 @@ impl IndexScheduler { Some(task_id) => { let task = self .index_scheduler + .queue + .tasks .get_task(self.rtxn, task_id) .map_err(|err| io::Error::new(io::ErrorKind::Other, err))? .ok_or_else(|| { @@ -1890,59 +736,6 @@ impl IndexScheduler { Ok(()) } - /// Register a task to cleanup the task queue if needed - fn cleanup_task_queue(&self) -> Result<()> { - let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; - - let nb_tasks = self.all_task_ids(&rtxn)?.len(); - // if we have less than 1M tasks everything is fine - if nb_tasks < self.max_number_of_tasks as u64 { - return Ok(()); - } - - let finished = self.status.get(&rtxn, &Status::Succeeded)?.unwrap_or_default() - | self.status.get(&rtxn, &Status::Failed)?.unwrap_or_default() - | self.status.get(&rtxn, &Status::Canceled)?.unwrap_or_default(); - - let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000)); - - // /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete - // the deletion tasks we enqueued ourselves. - if to_delete.len() < 2 { - tracing::warn!("The task queue is almost full, but no task can be deleted yet."); - // the only thing we can do is hope that the user tasks are going to finish - return Ok(()); - } - - tracing::info!( - "The task queue is almost full. Deleting the oldest {} finished tasks.", - to_delete.len() - ); - - // it's safe to unwrap here because we checked the len above - let newest_task_id = to_delete.iter().last().unwrap(); - let last_task_to_delete = - self.get_task(&rtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?; - drop(rtxn); - - // increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date. - let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1); - - self.register( - KindWithContent::TaskDeletion { - query: format!( - "?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled", - delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?, - ), - tasks: to_delete, - }, - None, - false, - )?; - - Ok(()) - } - pub fn index_stats(&self, index_uid: &str) -> Result { let is_indexing = self.is_index_processing(index_uid)?; let rtxn = self.read_txn()?; @@ -1961,13 +754,6 @@ impl IndexScheduler { Ok(()) } - pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> { - match task.content_uuid() { - Some(content_file) => self.delete_update_file(content_file), - None => Ok(()), - } - } - // TODO: consider using a type alias or a struct embedder/template pub fn embedders( &self, @@ -2017,223 +803,6 @@ impl IndexScheduler { .collect(); res.map(EmbeddingConfigs::new) } - - /// Blocks the thread until the test handle asks to progress to/through this breakpoint. - /// - /// Two messages are sent through the channel for each breakpoint. - /// The first message is `(b, false)` and the second message is `(b, true)`. - /// - /// Since the channel has a capacity of zero, the `send` and `recv` calls wait for each other. - /// So when the index scheduler calls `test_breakpoint_sdr.send(b, false)`, it blocks - /// the thread until the test catches up by calling `test_breakpoint_rcv.recv()` enough. - /// From the test side, we call `recv()` repeatedly until we find the message `(breakpoint, false)`. - /// As soon as we find it, the index scheduler is unblocked but then wait again on the call to - /// `test_breakpoint_sdr.send(b, true)`. This message will only be able to send once the - /// test asks to progress to the next `(b2, false)`. - #[cfg(test)] - fn breakpoint(&self, b: Breakpoint) { - // We send two messages. The first one will sync with the call - // to `handle.wait_until(b)`. The second one will block until the - // the next call to `handle.wait_until(..)`. - self.test_breakpoint_sdr.send((b, false)).unwrap(); - // This one will only be able to be sent if the test handle stays alive. - // If it fails, then it means that we have exited the test. - // By crashing with `unwrap`, we kill the run loop. - self.test_breakpoint_sdr.send((b, true)).unwrap(); - } -} - -pub struct Dump<'a> { - index_scheduler: &'a IndexScheduler, - wtxn: RwTxn<'a>, - - indexes: HashMap, - statuses: HashMap, - kinds: HashMap, -} - -impl<'a> Dump<'a> { - pub(crate) fn new(index_scheduler: &'a mut IndexScheduler) -> Result { - // While loading a dump no one should be able to access the scheduler thus I can block everything. - let wtxn = index_scheduler.env.write_txn()?; - - Ok(Dump { - index_scheduler, - wtxn, - indexes: HashMap::new(), - statuses: HashMap::new(), - kinds: HashMap::new(), - }) - } - - /// Register a new task coming from a dump in the scheduler. - /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running. - pub fn register_dumped_task( - &mut self, - task: TaskDump, - content_file: Option>, - ) -> Result { - let content_uuid = match content_file { - Some(content_file) if task.status == Status::Enqueued => { - let (uuid, mut file) = self.index_scheduler.create_update_file(false)?; - let mut builder = DocumentsBatchBuilder::new(&mut file); - for doc in content_file { - builder.append_json_object(&doc?)?; - } - builder.into_inner()?; - file.persist()?; - - Some(uuid) - } - // If the task isn't `Enqueued` then just generate a recognisable `Uuid` - // in case we try to open it later. - _ if task.status != Status::Enqueued => Some(Uuid::nil()), - _ => None, - }; - - let task = Task { - uid: task.uid, - batch_uid: task.batch_uid, - enqueued_at: task.enqueued_at, - started_at: task.started_at, - finished_at: task.finished_at, - error: task.error, - canceled_by: task.canceled_by, - details: task.details, - status: task.status, - kind: match task.kind { - KindDump::DocumentImport { - primary_key, - method, - documents_count, - allow_index_creation, - } => KindWithContent::DocumentAdditionOrUpdate { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - primary_key, - method, - content_file: content_uuid.ok_or(Error::CorruptedDump)?, - documents_count, - allow_index_creation, - }, - KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion { - documents_ids, - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - }, - KindDump::DocumentDeletionByFilter { filter } => { - KindWithContent::DocumentDeletionByFilter { - filter_expr: filter, - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - } - } - KindDump::DocumentEdition { filter, context, function } => { - KindWithContent::DocumentEdition { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - filter_expr: filter, - context, - function, - } - } - KindDump::DocumentClear => KindWithContent::DocumentClear { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - }, - KindDump::Settings { settings, is_deletion, allow_index_creation } => { - KindWithContent::SettingsUpdate { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - new_settings: settings, - is_deletion, - allow_index_creation, - } - } - KindDump::IndexDeletion => KindWithContent::IndexDeletion { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - }, - KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - primary_key, - }, - KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - primary_key, - }, - KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps }, - KindDump::TaskCancelation { query, tasks } => { - KindWithContent::TaskCancelation { query, tasks } - } - KindDump::TasksDeletion { query, tasks } => { - KindWithContent::TaskDeletion { query, tasks } - } - KindDump::DumpCreation { keys, instance_uid } => { - KindWithContent::DumpCreation { keys, instance_uid } - } - KindDump::SnapshotCreation => KindWithContent::SnapshotCreation, - }, - }; - - self.index_scheduler.all_tasks.put(&mut self.wtxn, &task.uid, &task)?; - - for index in task.indexes() { - match self.indexes.get_mut(index) { - Some(bitmap) => { - bitmap.insert(task.uid); - } - None => { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(task.uid); - self.indexes.insert(index.to_string(), bitmap); - } - }; - } - - utils::insert_task_datetime( - &mut self.wtxn, - self.index_scheduler.enqueued_at, - task.enqueued_at, - task.uid, - )?; - - // we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change - if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) { - if let Some(started_at) = task.started_at { - utils::insert_task_datetime( - &mut self.wtxn, - self.index_scheduler.started_at, - started_at, - task.uid, - )?; - } - if let Some(finished_at) = task.finished_at { - utils::insert_task_datetime( - &mut self.wtxn, - self.index_scheduler.finished_at, - finished_at, - task.uid, - )?; - } - } - - self.statuses.entry(task.status).or_default().insert(task.uid); - self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid); - - Ok(task) - } - - /// Commit all the changes and exit the importing dump state - pub fn finish(mut self) -> Result<()> { - for (index, bitmap) in self.indexes { - self.index_scheduler.index_tasks.put(&mut self.wtxn, &index, &bitmap)?; - } - for (status, bitmap) in self.statuses { - self.index_scheduler.put_status(&mut self.wtxn, status, &bitmap)?; - } - for (kind, bitmap) in self.kinds { - self.index_scheduler.put_kind(&mut self.wtxn, kind, &bitmap)?; - } - - self.wtxn.commit()?; - self.index_scheduler.wake_up.signal(); - - Ok(()) - } } /// The outcome of calling the [`IndexScheduler::tick`] function. @@ -2266,4685 +835,3 @@ pub struct IndexStats { /// Internal stats computed from the index. pub inner_stats: index_mapper::IndexStats, } - -#[cfg(test)] -mod tests { - use std::io::{BufWriter, Write}; - use std::time::Instant; - - use big_s::S; - use crossbeam_channel::RecvTimeoutError; - use file_store::File; - use insta::assert_json_snapshot; - use maplit::btreeset; - use meili_snap::{json_string, snapshot}; - use meilisearch_auth::AuthFilter; - use meilisearch_types::document_formats::DocumentFormatError; - use meilisearch_types::error::ErrorCode; - use meilisearch_types::index_uid_pattern::IndexUidPattern; - use meilisearch_types::milli::obkv_to_json; - use meilisearch_types::milli::update::IndexDocumentsMethod::{ - ReplaceDocuments, UpdateDocuments, - }; - use meilisearch_types::milli::update::Setting; - use meilisearch_types::milli::vector::settings::EmbeddingSettings; - use meilisearch_types::settings::Unchecked; - use meilisearch_types::tasks::IndexSwap; - use meilisearch_types::VERSION_FILE_NAME; - use tempfile::{NamedTempFile, TempDir}; - use time::Duration; - use uuid::Uuid; - use Breakpoint::*; - - use super::*; - use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler}; - - #[derive(Debug, Clone, Copy, PartialEq, Eq)] - pub enum FailureLocation { - InsideCreateBatch, - InsideProcessBatch, - PanicInsideProcessBatch, - AcquiringWtxn, - UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 }, - UpdatingTaskAfterProcessBatchFailure, - CommittingWtxn, - } - - impl IndexScheduler { - pub fn test( - autobatching_enabled: bool, - planned_failures: Vec<(usize, FailureLocation)>, - ) -> (Self, IndexSchedulerHandle) { - Self::test_with_custom_config(planned_failures, |config| { - config.autobatching_enabled = autobatching_enabled; - }) - } - - pub fn test_with_custom_config( - planned_failures: Vec<(usize, FailureLocation)>, - configuration: impl Fn(&mut IndexSchedulerOptions), - ) -> (Self, IndexSchedulerHandle) { - let tempdir = TempDir::new().unwrap(); - let (sender, receiver) = crossbeam_channel::bounded(0); - - let indexer_config = IndexerConfig { skip_index_budget: true, ..Default::default() }; - - let mut options = IndexSchedulerOptions { - version_file_path: tempdir.path().join(VERSION_FILE_NAME), - auth_path: tempdir.path().join("auth"), - tasks_path: tempdir.path().join("db_path"), - update_file_path: tempdir.path().join("file_store"), - indexes_path: tempdir.path().join("indexes"), - snapshots_path: tempdir.path().join("snapshots"), - dumps_path: tempdir.path().join("dumps"), - webhook_url: None, - webhook_authorization_header: None, - task_db_size: 1000 * 1000 * 10, // 10 MB, we don't use MiB on purpose. - index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. - enable_mdb_writemap: false, - index_growth_amount: 1000 * 1000 * 1000 * 1000, // 1 TB - index_count: 5, - indexer_config, - autobatching_enabled: true, - cleanup_enabled: true, - max_number_of_tasks: 1_000_000, - max_number_of_batched_tasks: usize::MAX, - instance_features: Default::default(), - }; - configuration(&mut options); - - let index_scheduler = Self::new(options, sender, planned_failures).unwrap(); - - // To be 100% consistent between all test we're going to start the scheduler right now - // and ensure it's in the expected starting state. - let breakpoint = match receiver.recv_timeout(std::time::Duration::from_secs(10)) { - Ok(b) => b, - Err(RecvTimeoutError::Timeout) => { - panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.") - } - Err(RecvTimeoutError::Disconnected) => panic!("The scheduler crashed."), - }; - assert_eq!(breakpoint, (Init, false)); - let index_scheduler_handle = IndexSchedulerHandle { - _tempdir: tempdir, - index_scheduler: index_scheduler.private_clone(), - test_breakpoint_rcv: receiver, - last_breakpoint: breakpoint.0, - }; - - (index_scheduler, index_scheduler_handle) - } - - /// Return a [`PlannedFailure`](Error::PlannedFailure) error if a failure is planned - /// for the given location and current run loop iteration. - pub fn maybe_fail(&self, location: FailureLocation) -> Result<()> { - if self.planned_failures.contains(&(*self.run_loop_iteration.read().unwrap(), location)) - { - match location { - FailureLocation::PanicInsideProcessBatch => { - panic!("simulated panic") - } - _ => Err(Error::PlannedFailure), - } - } else { - Ok(()) - } - } - } - - /// Return a `KindWithContent::IndexCreation` task - fn index_creation_task(index: &'static str, primary_key: &'static str) -> KindWithContent { - KindWithContent::IndexCreation { index_uid: S(index), primary_key: Some(S(primary_key)) } - } - /// Create a `KindWithContent::DocumentImport` task that imports documents. - /// - /// - `index_uid` is given as parameter - /// - `primary_key` is given as parameter - /// - `method` is set to `ReplaceDocuments` - /// - `content_file` is given as parameter - /// - `documents_count` is given as parameter - /// - `allow_index_creation` is set to `true` - fn replace_document_import_task( - index: &'static str, - primary_key: Option<&'static str>, - content_file_uuid: u128, - documents_count: u64, - ) -> KindWithContent { - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S(index), - primary_key: primary_key.map(ToOwned::to_owned), - method: ReplaceDocuments, - content_file: Uuid::from_u128(content_file_uuid), - documents_count, - allow_index_creation: true, - } - } - - /// Adapting to the new json reading interface - pub fn read_json( - bytes: &[u8], - write: impl Write, - ) -> std::result::Result { - let temp_file = NamedTempFile::new().unwrap(); - let mut buffer = BufWriter::new(temp_file.reopen().unwrap()); - buffer.write_all(bytes).unwrap(); - buffer.flush().unwrap(); - meilisearch_types::document_formats::read_json(temp_file.as_file(), write) - } - - /// Create an update file with the given file uuid. - /// - /// The update file contains just one simple document whose id is given by `document_id`. - /// - /// The uuid of the file and its documents count is returned. - fn sample_documents( - index_scheduler: &IndexScheduler, - file_uuid: u128, - document_id: usize, - ) -> (File, u64) { - let content = format!( - r#" - {{ - "id" : "{document_id}" - }}"# - ); - - let (_uuid, mut file) = index_scheduler.create_update_file_with_uuid(file_uuid).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - (file, documents_count) - } - - pub struct IndexSchedulerHandle { - _tempdir: TempDir, - index_scheduler: IndexScheduler, - test_breakpoint_rcv: crossbeam_channel::Receiver<(Breakpoint, bool)>, - last_breakpoint: Breakpoint, - } - - impl IndexSchedulerHandle { - /// Advance the scheduler to the next tick. - /// Panic - /// * If the scheduler is waiting for a task to be registered. - /// * If the breakpoint queue is in a bad state. - #[track_caller] - fn advance(&mut self) -> Breakpoint { - let (breakpoint_1, b) = match self - .test_breakpoint_rcv - .recv_timeout(std::time::Duration::from_secs(50)) - { - Ok(b) => b, - Err(RecvTimeoutError::Timeout) => { - let state = snapshot_index_scheduler(&self.index_scheduler); - panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}") - } - Err(RecvTimeoutError::Disconnected) => { - let state = snapshot_index_scheduler(&self.index_scheduler); - panic!("The scheduler crashed.\n{state}") - } - }; - // if we've already encountered a breakpoint we're supposed to be stuck on the false - // and we expect the same variant with the true to come now. - assert_eq!( - (breakpoint_1, b), - (self.last_breakpoint, true), - "Internal error in the test suite. In the previous iteration I got `({:?}, false)` and now I got `({:?}, {:?})`.", - self.last_breakpoint, - breakpoint_1, - b, - ); - - let (breakpoint_2, b) = match self - .test_breakpoint_rcv - .recv_timeout(std::time::Duration::from_secs(50)) - { - Ok(b) => b, - Err(RecvTimeoutError::Timeout) => { - let state = snapshot_index_scheduler(&self.index_scheduler); - panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}") - } - Err(RecvTimeoutError::Disconnected) => { - let state = snapshot_index_scheduler(&self.index_scheduler); - panic!("The scheduler crashed.\n{state}") - } - }; - assert!(!b, "Found the breakpoint handle in a bad state. Check your test suite"); - - self.last_breakpoint = breakpoint_2; - - breakpoint_2 - } - - /// Advance the scheduler until all the provided breakpoints are reached in order. - #[track_caller] - fn advance_till(&mut self, breakpoints: impl IntoIterator) { - for breakpoint in breakpoints { - let b = self.advance(); - assert_eq!( - b, - breakpoint, - "Was expecting the breakpoint `{:?}` but instead got `{:?}`.\n{}", - breakpoint, - b, - snapshot_index_scheduler(&self.index_scheduler) - ); - } - } - - /// Wait for `n` successful batches. - #[track_caller] - fn advance_n_successful_batches(&mut self, n: usize) { - for _ in 0..n { - self.advance_one_successful_batch(); - } - } - - /// Wait for `n` failed batches. - #[track_caller] - fn advance_n_failed_batches(&mut self, n: usize) { - for _ in 0..n { - self.advance_one_failed_batch(); - } - } - - // Wait for one successful batch. - #[track_caller] - fn advance_one_successful_batch(&mut self) { - self.advance_till([Start, BatchCreated]); - loop { - match self.advance() { - // the process_batch function can call itself recursively, thus we need to - // accept as may InsideProcessBatch as possible before moving to the next state. - InsideProcessBatch => (), - // the batch went successfully, we can stop the loop and go on with the next states. - ProcessBatchSucceeded => break, - AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)), - ProcessBatchFailed => { - while self.advance() != Start {} - panic!("The batch failed.\n{}", snapshot_index_scheduler(&self.index_scheduler)) - }, - breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), - } - } - - self.advance_till([AfterProcessing]); - } - - // Wait for one failed batch. - #[track_caller] - fn advance_one_failed_batch(&mut self) { - self.advance_till([Start, BatchCreated]); - loop { - match self.advance() { - // the process_batch function can call itself recursively, thus we need to - // accept as may InsideProcessBatch as possible before moving to the next state. - InsideProcessBatch => (), - // the batch went failed, we can stop the loop and go on with the next states. - ProcessBatchFailed => break, - ProcessBatchSucceeded => panic!("The batch succeeded. (and it wasn't supposed to sorry)\n{}", snapshot_index_scheduler(&self.index_scheduler)), - AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)), - breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), - } - } - self.advance_till([AfterProcessing]); - } - } - - #[test] - fn register() { - // In this test, the handle doesn't make any progress, we only check that the tasks are registered - let (index_scheduler, mut _handle) = IndexScheduler::test(true, vec![]); - - let kinds = [ - index_creation_task("catto", "mouse"), - replace_document_import_task("catto", None, 0, 12), - replace_document_import_task("catto", None, 1, 50), - replace_document_import_task("doggo", Some("bone"), 2, 5000), - ]; - let (_, file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - file.persist().unwrap(); - let (_, file) = index_scheduler.create_update_file_with_uuid(1).unwrap(); - file.persist().unwrap(); - let (_, file) = index_scheduler.create_update_file_with_uuid(2).unwrap(); - file.persist().unwrap(); - - for (idx, kind) in kinds.into_iter().enumerate() { - let k = kind.as_kind(); - let task = index_scheduler.register(kind, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - - assert_eq!(task.uid, idx as u32); - assert_eq!(task.status, Status::Enqueued); - assert_eq!(task.kind.as_kind(), k); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "everything_is_successfully_registered"); - } - - #[test] - fn insert_task_while_another_task_is_processing() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - handle.advance_till([Start, BatchCreated]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_creation"); - - // while the task is processing can we register another task? - index_scheduler.register(index_creation_task("index_b", "id"), None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - - index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }, None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); - } - - #[test] - fn test_task_is_processing() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_a_task"); - - handle.advance_till([Start, BatchCreated]); - assert!(index_scheduler.is_task_processing().unwrap()); - } - - /// We send a lot of tasks but notify the tasks scheduler only once as - /// we send them very fast, we must make sure that they are all processed. - #[test] - fn process_tasks_inserted_without_new_signal() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("cattos"), primary_key: None }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - - index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_second_task"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_third_task"); - } - - #[test] - fn process_tasks_without_autobatching() { - let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - - index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); - - index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_fourth_task"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "fourth"); - } - - #[test] - fn task_deletion_undeleteable() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); - let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); - file0.persist().unwrap(); - file1.persist().unwrap(); - - let to_enqueue = [ - index_creation_task("catto", "mouse"), - replace_document_import_task("catto", None, 0, documents_count0), - replace_document_import_task("doggo", Some("bone"), 1, documents_count1), - ]; - - for task in to_enqueue { - let _ = index_scheduler.register(task, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - } - - // here we have registered all the tasks, but the index scheduler - // has not progressed at all - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); - - index_scheduler - .register( - KindWithContent::TaskDeletion { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0, 1]), - }, - None, - false, - ) - .unwrap(); - // again, no progress made at all, but one more task is registered - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_enqueued"); - - // now we create the first batch - handle.advance_till([Start, BatchCreated]); - - // the task deletion should now be "processing" - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processing"); - - handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); - // after the task deletion is processed, no task should actually have been deleted, - // because the tasks with ids 0 and 1 were still "enqueued", and thus undeleteable - // the "task deletion" task should be marked as "succeeded" and, in its details, the - // number of deleted tasks should be 0 - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_done"); - } - - #[test] - fn task_deletion_deleteable() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); - let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); - file0.persist().unwrap(); - file1.persist().unwrap(); - - let to_enqueue = [ - replace_document_import_task("catto", None, 0, documents_count0), - replace_document_import_task("doggo", Some("bone"), 1, documents_count1), - ]; - - for task in to_enqueue { - let _ = index_scheduler.register(task, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); - - handle.advance_one_successful_batch(); - // first addition of documents should be successful - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed"); - - // Now we delete the first task - index_scheduler - .register( - KindWithContent::TaskDeletion { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_task_deletion"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); - } - - #[test] - fn task_deletion_delete_same_task_twice() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); - let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); - file0.persist().unwrap(); - file1.persist().unwrap(); - - let to_enqueue = [ - replace_document_import_task("catto", None, 0, documents_count0), - replace_document_import_task("doggo", Some("bone"), 1, documents_count1), - ]; - - for task in to_enqueue { - let _ = index_scheduler.register(task, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); - - handle.advance_one_successful_batch(); - // first addition of documents should be successful - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed"); - - // Now we delete the first task multiple times in a row - for _ in 0..2 { - index_scheduler - .register( - KindWithContent::TaskDeletion { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - handle.advance_one_successful_batch(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); - } - - #[test] - fn document_addition() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let content = r#" - { - "id": 1, - "doggo": "bob" - }"#; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); - - handle.advance_till([Start, BatchCreated]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_the_batch_creation"); - - handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "once_everything_is_processed"); - } - - #[test] - fn document_addition_and_index_deletion() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let content = r#" - { - "id": 1, - "doggo": "bob" - }"#; - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - - index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); - - handle.advance_one_successful_batch(); // The index creation. - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "before_index_creation"); - handle.advance_one_successful_batch(); // // after the execution of the two tasks in a single batch. - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "both_task_succeeded"); - } - - #[test] - fn document_addition_and_document_deletion() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let content = r#"[ - { "id": 1, "doggo": "jean bob" }, - { "id": 2, "catto": "jorts" }, - { "id": 3, "doggo": "bork" } - ]"#; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - index_scheduler - .register( - KindWithContent::DocumentDeletion { - index_uid: S("doggos"), - documents_ids: vec![S("1"), S("2")], - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - - handle.advance_one_successful_batch(); // The addition AND deletion should've been batched together - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_batch"); - - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn document_deletion_and_document_addition() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - index_scheduler - .register( - KindWithContent::DocumentDeletion { - index_uid: S("doggos"), - documents_ids: vec![S("1"), S("2")], - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - let content = r#"[ - { "id": 1, "doggo": "jean bob" }, - { "id": 2, "catto": "jorts" }, - { "id": 3, "doggo": "bork" } - ]"#; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - - // The deletion should have failed because it can't create an index - handle.advance_one_failed_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_the_deletion"); - - // The addition should works - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_last_successful_addition"); - - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn fail_in_process_batch_for_document_deletion() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - use meilisearch_types::settings::{Settings, Unchecked}; - let mut new_settings: Box> = Box::default(); - new_settings.filterable_attributes = Setting::Set(btreeset!(S("catto"))); - - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings, - is_deletion: false, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - - let content = r#"[ - { "id": 1, "doggo": "jean bob" }, - { "id": 2, "catto": "jorts" }, - { "id": 3, "doggo": "bork" } - ]"#; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_setting_and_document_addition"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_settings"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_documents"); - - index_scheduler - .register( - KindWithContent::DocumentDeletion { - index_uid: S("doggos"), - documents_ids: vec![S("1")], - }, - None, - false, - ) - .unwrap(); - // This one should not be catched by Meilisearch but it's still nice to handle it because if one day we break the filters it could happens - index_scheduler - .register( - KindWithContent::DocumentDeletionByFilter { - index_uid: S("doggos"), - filter_expr: serde_json::json!(true), - }, - None, - false, - ) - .unwrap(); - // Should fail because the ids are not filterable - index_scheduler - .register( - KindWithContent::DocumentDeletionByFilter { - index_uid: S("doggos"), - filter_expr: serde_json::json!("id = 2"), - }, - None, - false, - ) - .unwrap(); - index_scheduler - .register( - KindWithContent::DocumentDeletionByFilter { - index_uid: S("doggos"), - filter_expr: serde_json::json!("catto EXISTS"), - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_document_deletions"); - - // Everything should be batched together - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_removing_the_documents"); - - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents_remaining_should_only_be_bork"); - } - - #[test] - fn do_not_batch_task_of_different_indexes() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - let index_names = ["doggos", "cattos", "girafos"]; - - for name in index_names { - index_scheduler - .register( - KindWithContent::IndexCreation { - index_uid: name.to_string(), - primary_key: None, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - - for name in index_names { - index_scheduler - .register( - KindWithContent::DocumentClear { index_uid: name.to_string() }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - - for _ in 0..(index_names.len() * 2) { - handle.advance_one_successful_batch(); - index_scheduler.assert_internally_consistent(); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - } - - #[test] - fn swap_indexes() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let to_enqueue = [ - index_creation_task("a", "id"), - index_creation_task("b", "id"), - index_creation_task("c", "id"), - index_creation_task("d", "id"), - ]; - - for task in to_enqueue { - let _ = index_scheduler.register(task, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - } - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_a"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_b"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_c"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_d"); - - index_scheduler - .register( - KindWithContent::IndexSwap { - swaps: vec![ - IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, - IndexSwap { indexes: ("c".to_owned(), "d".to_owned()) }, - ], - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_registered"); - index_scheduler - .register( - KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }], - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "two_swaps_registered"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_processed"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_swap_processed"); - - index_scheduler - .register(KindWithContent::IndexSwap { swaps: vec![] }, None, false) - .unwrap(); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed"); - } - - #[test] - fn swap_indexes_errors() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let to_enqueue = [ - index_creation_task("a", "id"), - index_creation_task("b", "id"), - index_creation_task("c", "id"), - index_creation_task("d", "id"), - ]; - - for task in to_enqueue { - let _ = index_scheduler.register(task, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - } - handle.advance_n_successful_batches(4); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_the_index_creation"); - - let first_snap = snapshot_index_scheduler(&index_scheduler); - snapshot!(first_snap, name: "initial_tasks_processed"); - - let err = index_scheduler - .register( - KindWithContent::IndexSwap { - swaps: vec![ - IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, - IndexSwap { indexes: ("b".to_owned(), "a".to_owned()) }, - ], - }, - None, - false, - ) - .unwrap_err(); - snapshot!(format!("{err}"), @"Indexes must be declared only once during a swap. `a`, `b` were specified several times."); - - let second_snap = snapshot_index_scheduler(&index_scheduler); - assert_eq!(first_snap, second_snap); - - // Index `e` does not exist, but we don't check its existence yet - index_scheduler - .register( - KindWithContent::IndexSwap { - swaps: vec![ - IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, - IndexSwap { indexes: ("c".to_owned(), "e".to_owned()) }, - IndexSwap { indexes: ("d".to_owned(), "f".to_owned()) }, - ], - }, - None, - false, - ) - .unwrap(); - handle.advance_one_failed_batch(); - // Now the first swap should have an error message saying `e` and `f` do not exist - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_failed"); - } - - #[test] - fn document_addition_and_index_deletion_on_unexisting_index() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let content = r#" - { - "id": 1, - "doggo": "bob" - }"#; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) - .unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler)); - - handle.advance_n_successful_batches(1); - - snapshot!(snapshot_index_scheduler(&index_scheduler)); - } - - #[test] - fn cancel_enqueued_task() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); - file0.persist().unwrap(); - - let to_enqueue = [ - replace_document_import_task("catto", None, 0, documents_count0), - KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }, - ]; - for task in to_enqueue { - let _ = index_scheduler.register(task, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); - } - - #[test] - fn cancel_succeeded_task() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); - file0.persist().unwrap(); - - let _ = index_scheduler - .register(replace_document_import_task("catto", None, 0, documents_count0), None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processed"); - - index_scheduler - .register( - KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }, - None, - false, - ) - .unwrap(); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); - } - - #[test] - fn cancel_processing_task() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); - file0.persist().unwrap(); - - let _ = index_scheduler - .register(replace_document_import_task("catto", None, 0, documents_count0), None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - handle.advance_till([Start, BatchCreated, InsideProcessBatch]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processing"); - - index_scheduler - .register( - KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }, - None, - false, - ) - .unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_task_registered"); - // Now we check that we can reach the AbortedIndexation error handling - handle.advance_till([AbortedIndexation]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "aborted_indexation"); - - // handle.advance_till([Start, BatchCreated, BeforeProcessing, AfterProcessing]); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); - } - - #[test] - fn cancel_mix_of_tasks() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); - file0.persist().unwrap(); - let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); - file1.persist().unwrap(); - let (file2, documents_count2) = sample_documents(&index_scheduler, 2, 2); - file2.persist().unwrap(); - - let to_enqueue = [ - replace_document_import_task("catto", None, 0, documents_count0), - replace_document_import_task("beavero", None, 1, documents_count1), - replace_document_import_task("wolfo", None, 2, documents_count2), - ]; - for task in to_enqueue { - let _ = index_scheduler.register(task, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - } - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_task_processed"); - - handle.advance_till([Start, BatchCreated, InsideProcessBatch]); - index_scheduler - .register( - KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0, 1, 2]), - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processing_second_task_cancel_enqueued"); - - handle.advance_till([AbortedIndexation]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "aborted_indexation"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); - } - - #[test] - fn test_document_replace() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler)); - - // everything should be batched together. - handle.advance_n_successful_batches(1); - snapshot!(snapshot_index_scheduler(&index_scheduler)); - - // has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_update() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler)); - - // everything should be batched together. - handle.advance_n_successful_batches(1); - snapshot!(snapshot_index_scheduler(&index_scheduler)); - - // has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_mixed_document_addition() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for i in 0..10 { - let method = if i % 2 == 0 { UpdateDocuments } else { ReplaceDocuments }; - - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Only half of the task should've been processed since we can't autobatch replace and update together. - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); - - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - - // has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_settings_update() { - use meilisearch_types::settings::{Settings, Unchecked}; - use milli::update::Setting; - - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let mut new_settings: Box> = Box::default(); - let mut embedders = BTreeMap::default(); - let embedding_settings = milli::vector::settings::EmbeddingSettings { - source: Setting::Set(milli::vector::settings::EmbedderSource::Rest), - api_key: Setting::Set(S("My super secret")), - url: Setting::Set(S("http://localhost:7777")), - dimensions: Setting::Set(4), - request: Setting::Set(serde_json::json!("{{text}}")), - response: Setting::Set(serde_json::json!("{{embedding}}")), - ..Default::default() - }; - embedders.insert(S("default"), Setting::Set(embedding_settings)); - new_settings.embedders = Setting::Set(embedders); - - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings, - is_deletion: false, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task"); - - { - let rtxn = index_scheduler.read_txn().unwrap(); - let task = index_scheduler.get_task(&rtxn, 0).unwrap().unwrap(); - let task = meilisearch_types::task_view::TaskView::from_task(&task); - insta::assert_json_snapshot!(task.details); - } - - handle.advance_n_successful_batches(1); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed"); - - { - let rtxn = index_scheduler.read_txn().unwrap(); - let task = index_scheduler.get_task(&rtxn, 0).unwrap().unwrap(); - let task = meilisearch_types::task_view::TaskView::from_task(&task); - insta::assert_json_snapshot!(task.details); - } - - // has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - - let configs = index.embedding_configs(&rtxn).unwrap(); - let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap(); - insta::assert_snapshot!(name, @"default"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); - insta::assert_json_snapshot!(config.embedder_options); - } - - #[test] - fn test_document_replace_without_autobatching() { - let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Nothing should be batched thus half of the tasks are processed. - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); - - // Everything is processed. - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - - // has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_update_without_autobatching() { - let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Nothing should be batched thus half of the tasks are processed. - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); - - // Everything is processed. - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - - // has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[macro_export] - macro_rules! debug_snapshot { - ($value:expr, @$snapshot:literal) => {{ - let value = format!("{:?}", $value); - meili_snap::snapshot!(value, @$snapshot); - }}; - } - - #[test] - fn simple_new() { - crate::IndexScheduler::test(true, vec![]); - } - - #[test] - fn query_tasks_from_and_limit() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let kind = index_creation_task("doggo", "bone"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - let kind = index_creation_task("whalo", "plankton"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - let kind = index_creation_task("catto", "his_own_vomit"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); - - handle.advance_n_successful_batches(3); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks"); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - let query = Query { limit: Some(0), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[]"); - - let query = Query { limit: Some(1), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[2,]"); - - let query = Query { limit: Some(2), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); - - let query = Query { from: Some(1), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); - - let query = Query { from: Some(2), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); - - let query = Query { from: Some(1), limit: Some(1), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[1,]"); - - let query = Query { from: Some(1), limit: Some(2), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); - } - - #[test] - fn query_tasks_simple() { - let start_time = OffsetDateTime::now_utc(); - - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); - - handle.advance_till([Start, BatchCreated]); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - - let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick - - let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - after_started_at: Some(start_time), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // both enqueued and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the test, which should excludes the enqueued tasks - snapshot!(snapshot_bitmap(&tasks), @"[0,]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - before_started_at: Some(start_time), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // both enqueued and processing tasks in the first tick, but limited to those with a started_at - // that comes before the start of the test, which should excludes all of them - snapshot!(snapshot_bitmap(&tasks), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - after_started_at: Some(start_time), - before_started_at: Some(start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // both enqueued and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the test and before one minute after the start of the test, - // which should exclude the enqueued tasks and include the only processing task - snapshot!(snapshot_bitmap(&tasks), @"[0,]"); - - handle.advance_till([ - InsideProcessBatch, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - Start, - BatchCreated, - ]); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - - let second_start_time = OffsetDateTime::now_utc(); - - let query = Query { - statuses: Some(vec![Status::Succeeded, Status::Processing]), - after_started_at: Some(start_time), - before_started_at: Some(start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // both succeeded and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the test and before one minute after the start of the test, - // which should include all tasks - snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); - - let query = Query { - statuses: Some(vec![Status::Succeeded, Status::Processing]), - before_started_at: Some(start_time), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // both succeeded and processing tasks in the first tick, but limited to those with a started_at - // that comes before the start of the test, which should exclude all tasks - snapshot!(snapshot_bitmap(&tasks), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // both succeeded and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the second part of the test and before one minute after the - // second start of the test, which should exclude all tasks - snapshot!(snapshot_bitmap(&tasks), @"[]"); - - // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` - handle.advance_till([ - InsideProcessBatch, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - Start, - BatchCreated, - ]); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // we run the same query to verify that, and indeed find that the last task is matched - snapshot!(snapshot_bitmap(&tasks), @"[2,]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // enqueued, succeeded, or processing tasks started after the second part of the test, should - // again only return the last task - snapshot!(snapshot_bitmap(&tasks), @"[2,]"); - - handle.advance_till([ProcessBatchFailed, AfterProcessing]); - let rtxn = index_scheduler.read_txn().unwrap(); - - // now the last task should have failed - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end"); - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // so running the last query should return nothing - snapshot!(snapshot_bitmap(&tasks), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // but the same query on failed tasks should return the last task - snapshot!(snapshot_bitmap(&tasks), @"[2,]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // but the same query on failed tasks should return the last task - snapshot!(snapshot_bitmap(&tasks), @"[2,]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - uids: Some(vec![1]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // same query but with an invalid uid - snapshot!(snapshot_bitmap(&tasks), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - uids: Some(vec![2]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // same query but with a valid uid - snapshot!(snapshot_bitmap(&tasks), @"[2,]"); - } - - #[test] - fn query_tasks_special_rules() { - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], - }; - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], - }; - let _task = index_scheduler.register(kind, None, false).unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); - - handle.advance_till([Start, BatchCreated]); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - - let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // only the first task associated with catto is returned, the indexSwap tasks are excluded! - snapshot!(snapshot_bitmap(&tasks), @"[0,]"); - - let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes( - &rtxn, - &query, - &AuthFilter::with_allowed_indexes( - vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), - ), - ) - .unwrap(); - // we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks - // associated with doggo -> empty result - snapshot!(snapshot_bitmap(&tasks), @"[]"); - - let query = Query::default(); - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes( - &rtxn, - &query, - &AuthFilter::with_allowed_indexes( - vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), - ), - ) - .unwrap(); - // we asked for all the tasks, but we are only authorized to retrieve the doggo tasks - // -> only the index creation of doggo should be returned - snapshot!(snapshot_bitmap(&tasks), @"[1,]"); - - let query = Query::default(); - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes( - &rtxn, - &query, - &AuthFilter::with_allowed_indexes( - vec![ - IndexUidPattern::new_unchecked("catto"), - IndexUidPattern::new_unchecked("doggo"), - ] - .into_iter() - .collect(), - ), - ) - .unwrap(); - // we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks - // -> all tasks except the swap of catto with whalo are returned - snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); - - let query = Query::default(); - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // we asked for all the tasks with all index authorized -> all tasks returned - snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,3,]"); - } - - #[test] - fn query_tasks_canceled_by() { - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - let _ = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("doggo", "sheep"); - let _ = index_scheduler.register(kind, None, false).unwrap(); - let kind = KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], - }; - let _task = index_scheduler.register(kind, None, false).unwrap(); - - handle.advance_n_successful_batches(1); - let kind = KindWithContent::TaskCancelation { - query: "test_query".to_string(), - tasks: [0, 1, 2, 3].into_iter().collect(), - }; - let task_cancelation = index_scheduler.register(kind, None, false).unwrap(); - handle.advance_n_successful_batches(1); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); - - let rtxn = index_scheduler.read_txn().unwrap(); - let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // 0 is not returned because it was not canceled, 3 is not returned because it is the uid of the - // taskCancelation itself - snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); - - let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes( - &rtxn, - &query, - &AuthFilter::with_allowed_indexes( - vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), - ), - ) - .unwrap(); - // Return only 1 because the user is not authorized to see task 2 - snapshot!(snapshot_bitmap(&tasks), @"[1,]"); - } - - #[test] - fn query_batches_from_and_limit() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let kind = index_creation_task("doggo", "bone"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - let kind = index_creation_task("whalo", "plankton"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - let kind = index_creation_task("catto", "his_own_vomit"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); - - handle.advance_n_successful_batches(3); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks"); - - let proc = index_scheduler.processing_tasks.read().unwrap().clone(); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let query = Query { limit: Some(0), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[]"); - - let query = Query { limit: Some(1), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[2,]"); - - let query = Query { limit: Some(2), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[1,2,]"); - - let query = Query { from: Some(1), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); - - let query = Query { from: Some(2), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[0,1,2,]"); - - let query = Query { from: Some(1), limit: Some(1), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[1,]"); - - let query = Query { from: Some(1), limit: Some(2), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); - } - - #[test] - fn query_batches_simple() { - let start_time = OffsetDateTime::now_utc(); - - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); - - handle.advance_till([Start, BatchCreated]); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap().clone(); - - let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() }; - let (mut batches, _) = index_scheduler - .get_batches_from_authorized_indexes(query.clone(), &AuthFilter::default()) - .unwrap(); - assert_eq!(batches.len(), 1); - batches[0].started_at = OffsetDateTime::UNIX_EPOCH; - // Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689 - let batch = serde_json::to_string_pretty(&batches[0]).unwrap(); - snapshot!(batch, @r#" - { - "uid": 0, - "details": { - "primaryKey": "mouse" - }, - "stats": { - "totalNbTasks": 1, - "status": { - "processing": 1 - }, - "types": { - "indexCreation": 1 - }, - "indexUids": { - "catto": 1 - } - }, - "startedAt": "1970-01-01T00:00:00Z", - "finishedAt": null - } - "#); - - let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[]"); // The batches don't contains any enqueued tasks - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[0,]"); // both enqueued and processing tasks in the first tick - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - after_started_at: Some(start_time), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // both enqueued and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the test, which should excludes the enqueued tasks - snapshot!(snapshot_bitmap(&batches), @"[0,]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - before_started_at: Some(start_time), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // both enqueued and processing tasks in the first tick, but limited to those with a started_at - // that comes before the start of the test, which should excludes all of them - snapshot!(snapshot_bitmap(&batches), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - after_started_at: Some(start_time), - before_started_at: Some(start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // both enqueued and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the test and before one minute after the start of the test, - // which should exclude the enqueued tasks and include the only processing task - snapshot!(snapshot_bitmap(&batches), @"[0,]"); - - handle.advance_till([ - InsideProcessBatch, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - Start, - BatchCreated, - ]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-advancing-a-bit"); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap().clone(); - - let second_start_time = OffsetDateTime::now_utc(); - - let query = Query { - statuses: Some(vec![Status::Succeeded, Status::Processing]), - after_started_at: Some(start_time), - before_started_at: Some(start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // both succeeded and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the test and before one minute after the start of the test, - // which should include all tasks - snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); - - let query = Query { - statuses: Some(vec![Status::Succeeded, Status::Processing]), - before_started_at: Some(start_time), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // both succeeded and processing tasks in the first tick, but limited to those with a started_at - // that comes before the start of the test, which should exclude all tasks - snapshot!(snapshot_bitmap(&batches), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // both succeeded and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the second part of the test and before one minute after the - // second start of the test, which should exclude all tasks - snapshot!(snapshot_bitmap(&batches), @"[]"); - - // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` - handle.advance_till([ - InsideProcessBatch, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - Start, - BatchCreated, - ]); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap().clone(); - - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // we run the same query to verify that, and indeed find that the last task is matched - snapshot!(snapshot_bitmap(&batches), @"[2,]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // enqueued, succeeded, or processing tasks started after the second part of the test, should - // again only return the last task - snapshot!(snapshot_bitmap(&batches), @"[2,]"); - - handle.advance_till([ProcessBatchFailed, AfterProcessing]); - let rtxn = index_scheduler.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap().clone(); - - // now the last task should have failed - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end"); - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // so running the last query should return nothing - snapshot!(snapshot_bitmap(&batches), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // but the same query on failed tasks should return the last task - snapshot!(snapshot_bitmap(&batches), @"[2,]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // but the same query on failed tasks should return the last task - snapshot!(snapshot_bitmap(&batches), @"[2,]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - uids: Some(vec![1]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // same query but with an invalid uid - snapshot!(snapshot_bitmap(&batches), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - uids: Some(vec![2]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // same query but with a valid uid - snapshot!(snapshot_bitmap(&batches), @"[2,]"); - } - - #[test] - fn query_batches_special_rules() { - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], - }; - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], - }; - let _task = index_scheduler.register(kind, None, false).unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); - - handle.advance_till([Start, BatchCreated]); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap().clone(); - - let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // only the first task associated with catto is returned, the indexSwap tasks are excluded! - snapshot!(snapshot_bitmap(&batches), @"[0,]"); - - let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes( - &rtxn, - &proc, - &query, - &AuthFilter::with_allowed_indexes( - vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), - ), - ) - .unwrap(); - // we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks - // associated with doggo -> empty result - snapshot!(snapshot_bitmap(&batches), @"[]"); - - drop(rtxn); - // We're going to advance and process all the batches for the next query to actually hit the db - handle.advance_till([ - InsideProcessBatch, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - ]); - handle.advance_one_successful_batch(); - handle.advance_n_failed_batches(2); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-processing-everything"); - let rtxn = index_scheduler.env.read_txn().unwrap(); - - let query = Query::default(); - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes( - &rtxn, - &proc, - &query, - &AuthFilter::with_allowed_indexes( - vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), - ), - ) - .unwrap(); - // we asked for all the tasks, but we are only authorized to retrieve the doggo tasks - // -> only the index creation of doggo should be returned - snapshot!(snapshot_bitmap(&batches), @"[1,]"); - - let query = Query::default(); - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes( - &rtxn, - &proc, - &query, - &AuthFilter::with_allowed_indexes( - vec![ - IndexUidPattern::new_unchecked("catto"), - IndexUidPattern::new_unchecked("doggo"), - ] - .into_iter() - .collect(), - ), - ) - .unwrap(); - // we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks - // -> all tasks except the swap of catto with whalo are returned - snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); - - let query = Query::default(); - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // we asked for all the tasks with all index authorized -> all tasks returned - snapshot!(snapshot_bitmap(&batches), @"[0,1,2,3,]"); - } - - #[test] - fn query_batches_canceled_by() { - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - let _ = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("doggo", "sheep"); - let _ = index_scheduler.register(kind, None, false).unwrap(); - let kind = KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], - }; - let _task = index_scheduler.register(kind, None, false).unwrap(); - - handle.advance_n_successful_batches(1); - let kind = KindWithContent::TaskCancelation { - query: "test_query".to_string(), - tasks: [0, 1, 2, 3].into_iter().collect(), - }; - let task_cancelation = index_scheduler.register(kind, None, false).unwrap(); - handle.advance_n_successful_batches(1); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); - - let rtxn = index_scheduler.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap().clone(); - let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // The batch zero was the index creation task, the 1 is the task cancellation - snapshot!(snapshot_bitmap(&batches), @"[1,]"); - - let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes( - &rtxn, - &proc, - &query, - &AuthFilter::with_allowed_indexes( - vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), - ), - ) - .unwrap(); - // Return only 1 because the user is not authorized to see task 2 - snapshot!(snapshot_bitmap(&batches), @"[1,]"); - } - - #[test] - fn fail_in_process_batch_for_index_creation() { - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(1, FailureLocation::InsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); - - handle.advance_one_failed_batch(); - - // Still in the first iteration - assert_eq!(*index_scheduler.run_loop_iteration.read().unwrap(), 1); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed"); - } - - #[test] - fn fail_in_process_batch_for_document_addition() { - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(1, FailureLocation::InsideProcessBatch)]); - - let content = r#" - { - "id": 1, - "doggo": "bob" - }"#; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - handle.advance_till([Start, BatchCreated]); - - snapshot!( - snapshot_index_scheduler(&index_scheduler), - name: "document_addition_batch_created" - ); - - handle.advance_till([ProcessBatchFailed, AfterProcessing]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "document_addition_failed"); - } - - #[test] - fn fail_in_update_task_after_process_batch_success_for_document_addition() { - let (index_scheduler, mut handle) = IndexScheduler::test( - true, - vec![(1, FailureLocation::UpdatingTaskAfterProcessBatchSuccess { task_uid: 0 })], - ); - - let content = r#" - { - "id": 1, - "doggo": "bob" - }"#; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - handle.advance_till([Start]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "document_addition_succeeded_but_index_scheduler_not_updated"); - - handle.advance_till([BatchCreated, InsideProcessBatch, ProcessBatchSucceeded]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_succeeded"); - - // At this point the next time the scheduler will try to progress it should encounter - // a critical failure and have to wait for 1s before retrying anything. - - let before_failure = Instant::now(); - handle.advance_till([Start]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_to_commit"); - let failure_duration = before_failure.elapsed(); - assert!(failure_duration.as_millis() >= 1000); - - handle.advance_till([ - BatchCreated, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - ]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_successfully_processed"); - } - - #[test] - fn test_document_addition_cant_create_index_without_index() { - // We're going to autobatch multiple document addition that don't have - // the right to create an index while there is no index currently. - // Thus, everything should be batched together and a IndexDoesNotExists - // error should be throwed. - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Everything should be batched together. - handle.advance_till([ - Start, - BatchCreated, - InsideProcessBatch, - ProcessBatchFailed, - AfterProcessing, - ]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_10_tasks"); - - // The index should not exist. - snapshot!(matches!(index_scheduler.index_exists("doggos"), Ok(true)), @"false"); - } - - #[test] - fn test_document_addition_cant_create_index_without_index_without_autobatching() { - // We're going to execute multiple document addition that don't have - // the right to create an index while there is no index currently. - // Since the auto-batching is disabled, every task should be processed - // sequentially and throw an IndexDoesNotExists. - let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Nothing should be batched thus half of the tasks are processed. - handle.advance_n_failed_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); - - // Everything is processed. - handle.advance_n_failed_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - - // The index should not exist. - snapshot!(matches!(index_scheduler.index_exists("doggos"), Ok(true)), @"false"); - } - - #[test] - fn test_document_addition_cant_create_index_with_index() { - // We're going to autobatch multiple document addition that don't have - // the right to create an index while there is already an index. - // Thus, everything should be batched together and no error should be - // throwed. - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - // Create the index. - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Everything should be batched together. - handle.advance_n_successful_batches(1); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_10_tasks"); - - // Has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_cant_create_index_with_index_without_autobatching() { - // We're going to execute multiple document addition that don't have - // the right to create an index while there is no index currently. - // Since the autobatching is disabled, every tasks should be processed - // sequentially and throw an IndexDoesNotExists. - let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); - - // Create the index. - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Nothing should be batched thus half of the tasks are processed. - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); - - // Everything is processed. - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - - // Has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_mixed_rights_with_index() { - // We're going to autobatch multiple document addition. - // - The index already exists - // - The first document addition don't have the right to create an index - // can it batch with the other one? - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - // Create the index. - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - let allow_index_creation = i % 2 != 0; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Everything should be batched together. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - - // Has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_mixed_right_without_index_starts_with_cant_create() { - // We're going to autobatch multiple document addition. - // - The index does not exists - // - The first document addition don't have the right to create an index - // - The second do. They should not batch together. - // - The second should batch with everything else as it's going to create an index. - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - let allow_index_creation = i % 2 != 0; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // A first batch should be processed with only the first documentAddition that's going to fail. - handle.advance_one_failed_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "only_first_task_failed"); - - // Everything else should be batched together. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - - // Has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_with_multiple_primary_key() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for (id, primary_key) in ["id", "bork", "bloup"].iter().enumerate() { - let content = format!( - r#"{{ - "id": {id}, - "doggo": "jean bob" - }}"#, - ); - let (uuid, mut file) = - index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - assert_eq!(documents_count, 1); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S(primary_key)), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_3_tasks"); - - // A first batch should be processed with only the first documentAddition. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "only_first_task_succeed"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_task_fails"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_fails"); - - // Is the primary key still what we expect? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); - snapshot!(primary_key, @"id"); - - // Is the document still the one we expect?. - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_with_multiple_primary_key_batch_wrong_key() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for (id, primary_key) in ["id", "bork", "bork"].iter().enumerate() { - let content = format!( - r#"{{ - "id": {id}, - "doggo": "jean bob" - }}"#, - ); - let (uuid, mut file) = - index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - assert_eq!(documents_count, 1); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S(primary_key)), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_3_tasks"); - - // A first batch should be processed with only the first documentAddition. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "only_first_task_succeed"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_and_third_tasks_fails"); - - // Is the primary key still what we expect? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); - snapshot!(primary_key, @"id"); - - // Is the document still the one we expect?. - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_with_bad_primary_key() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for (id, primary_key) in ["bork", "bork", "id", "bork", "id"].iter().enumerate() { - let content = format!( - r#"{{ - "id": {id}, - "doggo": "jean bob" - }}"#, - ); - let (uuid, mut file) = - index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - assert_eq!(documents_count, 1); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S(primary_key)), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_5_tasks"); - - // A first batch should be processed with only the first two documentAddition. - // it should fails because the documents don't contains any `bork` field. - // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_and_second_task_fails"); - - // The primary key should be set to none since we failed the batch. - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap(); - snapshot!(primary_key.is_none(), @"true"); - - // The second batch should succeed and only contains one task. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_succeeds"); - - // The primary key should be set to `id` since this batch succeeded. - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); - snapshot!(primary_key, @"id"); - - // We're trying to `bork` again, but now there is already a primary key set for this index. - // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "fourth_task_fails"); - - // Finally the last task should succeed since its primary key is the same as the valid one. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "fifth_task_succeeds"); - - // Is the primary key still what we expect? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); - snapshot!(primary_key, @"id"); - - // Is the document still the one we expect?. - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_with_set_and_null_primary_key() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for (id, primary_key) in - [None, Some("bork"), Some("paw"), None, None, Some("paw")].into_iter().enumerate() - { - let content = format!( - r#"{{ - "paw": {id}, - "doggo": "jean bob" - }}"#, - ); - let (uuid, mut file) = - index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - assert_eq!(documents_count, 1); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: primary_key.map(|pk| pk.to_string()), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_6_tasks"); - - // A first batch should contains only one task that fails because we can't infer the primary key. - // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_task_fails"); - - // The second batch should contains only one task that fails because we bork is not a valid primary key. - // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_task_fails"); - - // No primary key should be set at this point. - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap(); - snapshot!(primary_key.is_none(), @"true"); - - // The third batch should succeed and only contains one task. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_succeeds"); - - // The primary key should be set to `id` since this batch succeeded. - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); - snapshot!(primary_key, @"paw"); - - // We should be able to batch together the next two tasks that don't specify any primary key - // + the last task that matches the current primary-key. Everything should succeed. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_other_tasks_succeeds"); - - // Is the primary key still what we expect? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); - snapshot!(primary_key, @"paw"); - - // Is the document still the one we expect?. - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_with_set_and_null_primary_key_inference_works() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for (id, primary_key) in [None, Some("bork"), Some("doggoid"), None, None, Some("doggoid")] - .into_iter() - .enumerate() - { - let content = format!( - r#"{{ - "doggoid": {id}, - "doggo": "jean bob" - }}"#, - ); - let (uuid, mut file) = - index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - assert_eq!(documents_count, 1); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: primary_key.map(|pk| pk.to_string()), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_6_tasks"); - - // A first batch should contains only one task that succeed and sets the primary key to `doggoid`. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_task_succeed"); - - // Checking the primary key. - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap(); - snapshot!(primary_key.is_none(), @"false"); - - // The second batch should contains only one task that fails because it tries to update the primary key to `bork`. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_task_fails"); - - // The third batch should succeed and only contains one task. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_succeeds"); - - // We should be able to batch together the next two tasks that don't specify any primary key - // + the last task that matches the current primary-key. Everything should succeed. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_other_tasks_succeeds"); - - // Is the primary key still what we expect? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); - snapshot!(primary_key, @"doggoid"); - - // Is the document still the one we expect?. - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn panic_in_process_batch_for_index_creation() { - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(1, FailureLocation::PanicInsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - handle.advance_till([Start, BatchCreated, ProcessBatchFailed, AfterProcessing]); - - // Still in the first iteration - assert_eq!(*index_scheduler.run_loop_iteration.read().unwrap(), 1); - // No matter what happens in process_batch, the index_scheduler should be internally consistent - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed"); - } - - #[test] - fn test_task_queue_is_full() { - let (index_scheduler, mut handle) = - IndexScheduler::test_with_custom_config(vec![], |config| { - // that's the minimum map size possible - config.task_db_size = 1048576; - }); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - // on average this task takes ~600 bytes - loop { - let result = index_scheduler.register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ); - if result.is_err() { - break; - } - handle.advance_one_failed_batch(); - } - index_scheduler.assert_internally_consistent(); - - // at this point the task DB shoud have reached its limit and we should not be able to register new tasks - let result = index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap_err(); - snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); - // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code - snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice"); - - // Even the task deletion that doesn't delete anything shouldn't be accepted - let result = index_scheduler - .register( - KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() }, - None, - false, - ) - .unwrap_err(); - snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); - // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code - snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice"); - - // But a task deletion that delete something should works - index_scheduler - .register( - KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - // Now we should be able to enqueue a few tasks again - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - handle.advance_one_failed_batch(); - } - - #[test] - fn test_auto_deletion_of_tasks() { - let (index_scheduler, mut handle) = - IndexScheduler::test_with_custom_config(vec![], |config| { - config.max_number_of_tasks = 2; - }); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - handle.advance_one_failed_batch(); - - // at this point the max number of tasks is reached - // we can still enqueue multiple tasks - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); - let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); - drop(rtxn); - - // now we're above the max number of tasks - // and if we try to advance in the tick function a new task deletion should be enqueued - handle.advance_till([Start, BatchCreated]); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); - let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued"); - drop(rtxn); - - handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); - let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed"); - drop(rtxn); - - handle.advance_one_failed_batch(); - // a new task deletion has been enqueued - handle.advance_one_successful_batch(); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); - let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion"); - drop(rtxn); - - handle.advance_one_failed_batch(); - handle.advance_one_successful_batch(); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); - let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed"); - drop(rtxn); - } - - #[test] - fn test_disable_auto_deletion_of_tasks() { - let (index_scheduler, mut handle) = - IndexScheduler::test_with_custom_config(vec![], |config| { - config.cleanup_enabled = false; - config.max_number_of_tasks = 2; - }); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - handle.advance_one_failed_batch(); - - // at this point the max number of tasks is reached - // we can still enqueue multiple tasks - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); - let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); - drop(rtxn); - - // now we're above the max number of tasks - // and if we try to advance in the tick function no new task deletion should be enqueued - handle.advance_till([Start, BatchCreated]); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); - let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued"); - drop(rtxn); - } - - #[test] - fn basic_get_stats() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - - snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" - { - "indexes": { - "catto": 1, - "doggo": 1, - "whalo": 1 - }, - "statuses": { - "canceled": 0, - "enqueued": 3, - "failed": 0, - "processing": 0, - "succeeded": 0 - }, - "types": { - "documentAdditionOrUpdate": 0, - "documentDeletion": 0, - "documentEdition": 0, - "dumpCreation": 0, - "indexCreation": 3, - "indexDeletion": 0, - "indexSwap": 0, - "indexUpdate": 0, - "settingsUpdate": 0, - "snapshotCreation": 0, - "taskCancelation": 0, - "taskDeletion": 0 - } - } - "###); - - handle.advance_till([Start, BatchCreated]); - snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" - { - "indexes": { - "catto": 1, - "doggo": 1, - "whalo": 1 - }, - "statuses": { - "canceled": 0, - "enqueued": 2, - "failed": 0, - "processing": 1, - "succeeded": 0 - }, - "types": { - "documentAdditionOrUpdate": 0, - "documentDeletion": 0, - "documentEdition": 0, - "dumpCreation": 0, - "indexCreation": 3, - "indexDeletion": 0, - "indexSwap": 0, - "indexUpdate": 0, - "settingsUpdate": 0, - "snapshotCreation": 0, - "taskCancelation": 0, - "taskDeletion": 0 - } - } - "###); - - handle.advance_till([ - InsideProcessBatch, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - Start, - BatchCreated, - ]); - snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" - { - "indexes": { - "catto": 1, - "doggo": 1, - "whalo": 1 - }, - "statuses": { - "canceled": 0, - "enqueued": 1, - "failed": 0, - "processing": 1, - "succeeded": 1 - }, - "types": { - "documentAdditionOrUpdate": 0, - "documentDeletion": 0, - "documentEdition": 0, - "dumpCreation": 0, - "indexCreation": 3, - "indexDeletion": 0, - "indexSwap": 0, - "indexUpdate": 0, - "settingsUpdate": 0, - "snapshotCreation": 0, - "taskCancelation": 0, - "taskDeletion": 0 - } - } - "###); - - // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` - handle.advance_till([ - InsideProcessBatch, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - Start, - BatchCreated, - ]); - snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" - { - "indexes": { - "catto": 1, - "doggo": 1, - "whalo": 1 - }, - "statuses": { - "canceled": 0, - "enqueued": 0, - "failed": 0, - "processing": 1, - "succeeded": 2 - }, - "types": { - "documentAdditionOrUpdate": 0, - "documentDeletion": 0, - "documentEdition": 0, - "dumpCreation": 0, - "indexCreation": 3, - "indexDeletion": 0, - "indexSwap": 0, - "indexUpdate": 0, - "settingsUpdate": 0, - "snapshotCreation": 0, - "taskCancelation": 0, - "taskDeletion": 0 - } - } - "###); - } - - #[test] - fn cancel_processing_dump() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None }; - let dump_cancellation = KindWithContent::TaskCancelation { - query: "cancel dump".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }; - let _ = index_scheduler.register(dump_creation, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register"); - handle.advance_till([Start, BatchCreated, InsideProcessBatch]); - - let _ = index_scheduler.register(dump_cancellation, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered"); - - snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); - } - - #[test] - fn basic_set_taskid() { - let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); - - let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(task.uid, @"0"); - - let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let task = index_scheduler.register(kind, Some(12), false).unwrap(); - snapshot!(task.uid, @"12"); - - let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let error = index_scheduler.register(kind, Some(5), false).unwrap_err(); - snapshot!(error, @"Received bad task id: 5 should be >= to 13."); - } - - #[test] - fn dry_run() { - let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); - - let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let task = index_scheduler.register(kind, None, true).unwrap(); - snapshot!(task.uid, @"0"); - snapshot!(snapshot_index_scheduler(&index_scheduler), @r" - ### Autobatching Enabled = true - ### Processing batch None: - [] - ---------------------------------------------------------------------- - ### All Tasks: - ---------------------------------------------------------------------- - ### Status: - ---------------------------------------------------------------------- - ### Kind: - ---------------------------------------------------------------------- - ### Index Tasks: - ---------------------------------------------------------------------- - ### Index Mapper: - - ---------------------------------------------------------------------- - ### Canceled By: - - ---------------------------------------------------------------------- - ### Enqueued At: - ---------------------------------------------------------------------- - ### Started At: - ---------------------------------------------------------------------- - ### Finished At: - ---------------------------------------------------------------------- - ### All Batches: - ---------------------------------------------------------------------- - ### Batch to tasks mapping: - ---------------------------------------------------------------------- - ### Batches Status: - ---------------------------------------------------------------------- - ### Batches Kind: - ---------------------------------------------------------------------- - ### Batches Index Tasks: - ---------------------------------------------------------------------- - ### Batches Enqueued At: - ---------------------------------------------------------------------- - ### Batches Started At: - ---------------------------------------------------------------------- - ### Batches Finished At: - ---------------------------------------------------------------------- - ### File Store: - - ---------------------------------------------------------------------- - "); - - let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let task = index_scheduler.register(kind, Some(12), true).unwrap(); - snapshot!(task.uid, @"12"); - snapshot!(snapshot_index_scheduler(&index_scheduler), @r" - ### Autobatching Enabled = true - ### Processing batch None: - [] - ---------------------------------------------------------------------- - ### All Tasks: - ---------------------------------------------------------------------- - ### Status: - ---------------------------------------------------------------------- - ### Kind: - ---------------------------------------------------------------------- - ### Index Tasks: - ---------------------------------------------------------------------- - ### Index Mapper: - - ---------------------------------------------------------------------- - ### Canceled By: - - ---------------------------------------------------------------------- - ### Enqueued At: - ---------------------------------------------------------------------- - ### Started At: - ---------------------------------------------------------------------- - ### Finished At: - ---------------------------------------------------------------------- - ### All Batches: - ---------------------------------------------------------------------- - ### Batch to tasks mapping: - ---------------------------------------------------------------------- - ### Batches Status: - ---------------------------------------------------------------------- - ### Batches Kind: - ---------------------------------------------------------------------- - ### Batches Index Tasks: - ---------------------------------------------------------------------- - ### Batches Enqueued At: - ---------------------------------------------------------------------- - ### Batches Started At: - ---------------------------------------------------------------------- - ### Batches Finished At: - ---------------------------------------------------------------------- - ### File Store: - - ---------------------------------------------------------------------- - "); - } - - #[test] - fn import_vectors() { - use meilisearch_types::settings::{Settings, Unchecked}; - use milli::update::Setting; - - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let mut new_settings: Box> = Box::default(); - let mut embedders = BTreeMap::default(); - let embedding_settings = milli::vector::settings::EmbeddingSettings { - source: Setting::Set(milli::vector::settings::EmbedderSource::Rest), - api_key: Setting::Set(S("My super secret")), - url: Setting::Set(S("http://localhost:7777")), - dimensions: Setting::Set(384), - request: Setting::Set(serde_json::json!("{{text}}")), - response: Setting::Set(serde_json::json!("{{embedding}}")), - ..Default::default() - }; - embedders.insert(S("A_fakerest"), Setting::Set(embedding_settings)); - - let embedding_settings = milli::vector::settings::EmbeddingSettings { - source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace), - model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")), - revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")), - document_template: Setting::Set(S("{{doc.doggo}} the {{doc.breed}} best doggo")), - ..Default::default() - }; - embedders.insert(S("B_small_hf"), Setting::Set(embedding_settings)); - - new_settings.embedders = Setting::Set(embedders); - - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings, - is_deletion: false, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors"); - - { - let rtxn = index_scheduler.read_txn().unwrap(); - let task = index_scheduler.get_task(&rtxn, 0).unwrap().unwrap(); - let task = meilisearch_types::task_view::TaskView::from_task(&task); - insta::assert_json_snapshot!(task.details); - } - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors"); - - { - let rtxn = index_scheduler.read_txn().unwrap(); - let task = index_scheduler.get_task(&rtxn, 0).unwrap().unwrap(); - let task = meilisearch_types::task_view::TaskView::from_task(&task); - insta::assert_json_snapshot!(task.details); - } - - let (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) = { - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - - let configs = index.embedding_configs(&rtxn).unwrap(); - // for consistency with the below - #[allow(clippy::get_first)] - let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } = - configs.get(0).unwrap(); - insta::assert_snapshot!(name, @"A_fakerest"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); - insta::assert_json_snapshot!(fakerest_config.embedder_options); - let fakerest_name = name.clone(); - - let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } = - configs.get(1).unwrap(); - insta::assert_snapshot!(name, @"B_small_hf"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); - insta::assert_json_snapshot!(simple_hf_config.embedder_options); - let simple_hf_name = name.clone(); - - let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap(); - let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap(); - let beagle_embed = - hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap(); - let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo"), None).unwrap(); - let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo"), None).unwrap(); - (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) - }; - - // add one doc, specifying vectors - - let doc = serde_json::json!( - { - "id": 0, - "doggo": "Intel", - "breed": "beagle", - "_vectors": { - &fakerest_name: { - // this will never trigger regeneration, which is good because we can't actually generate with - // this embedder - "regenerate": false, - "embeddings": beagle_embed, - }, - &simple_hf_name: { - // this will be regenerated on updates - "regenerate": true, - "embeddings": lab_embed, - }, - "noise": [0.1, 0.2, 0.3] - } - } - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0u128).unwrap(); - let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap(); - assert_eq!(documents_count, 1); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel"); - - handle.advance_one_successful_batch(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "adding Intel succeeds"); - - // check embeddings - { - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - - // Ensure the document have been inserted into the relevant bitamp - let configs = index.embedding_configs(&rtxn).unwrap(); - // for consistency with the below - #[allow(clippy::get_first)] - let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } = - configs.get(0).unwrap(); - insta::assert_snapshot!(name, @"A_fakerest"); - insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>"); - - let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap(); - insta::assert_snapshot!(name, @"B_small_hf"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); - - let embeddings = index.embeddings(&rtxn, 0).unwrap(); - - assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true"); - assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true"); - - let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1; - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let doc = obkv_to_json( - &[ - fields_ids_map.id("doggo").unwrap(), - fields_ids_map.id("breed").unwrap(), - fields_ids_map.id("_vectors").unwrap(), - ], - &fields_ids_map, - doc, - ) - .unwrap(); - assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"}); - } - - // update the doc, specifying vectors - - let doc = serde_json::json!( - { - "id": 0, - "doggo": "kefir", - "breed": "patou", - } - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(1u128).unwrap(); - let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap(); - assert_eq!(documents_count, 1); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: None, - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir succeeds"); - - { - // check embeddings - { - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - - // Ensure the document have been inserted into the relevant bitamp - let configs = index.embedding_configs(&rtxn).unwrap(); - // for consistency with the below - #[allow(clippy::get_first)] - let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } = - configs.get(0).unwrap(); - insta::assert_snapshot!(name, @"A_fakerest"); - insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>"); - - let IndexEmbeddingConfig { name, config: _, user_provided } = - configs.get(1).unwrap(); - insta::assert_snapshot!(name, @"B_small_hf"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); - - let embeddings = index.embeddings(&rtxn, 0).unwrap(); - - // automatically changed to patou because set to regenerate - assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true"); - // remained beagle - assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true"); - - let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1; - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let doc = obkv_to_json( - &[ - fields_ids_map.id("doggo").unwrap(), - fields_ids_map.id("breed").unwrap(), - fields_ids_map.id("_vectors").unwrap(), - ], - &fields_ids_map, - doc, - ) - .unwrap(); - assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"}); - } - } - } - - #[test] - fn import_vectors_first_and_embedder_later() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let content = serde_json::json!( - [ - { - "id": 0, - "doggo": "kefir", - }, - { - "id": 1, - "doggo": "intel", - "_vectors": { - "my_doggo_embedder": vec![1; 384], - "unknown embedder": vec![1, 2, 3], - } - }, - { - "id": 2, - "doggo": "max", - "_vectors": { - "my_doggo_embedder": { - "regenerate": false, - "embeddings": vec![2; 384], - }, - "unknown embedder": vec![4, 5], - }, - }, - { - "id": 3, - "doggo": "marcel", - "_vectors": { - "my_doggo_embedder": { - "regenerate": true, - "embeddings": vec![3; 384], - }, - }, - }, - { - "id": 4, - "doggo": "sora", - "_vectors": { - "my_doggo_embedder": { - "regenerate": true, - }, - }, - }, - ] - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap(); - let documents_count = - read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file) - .unwrap(); - snapshot!(documents_count, @"5"); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: None, - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string(&documents).unwrap(), name: "documents after initial push"); - - let setting = meilisearch_types::settings::Settings:: { - embedders: Setting::Set(maplit::btreemap! { - S("my_doggo_embedder") => Setting::Set(EmbeddingSettings { - source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace), - model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")), - revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")), - document_template: Setting::Set(S("{{doc.doggo}}")), - ..Default::default() - }) - }), - ..Default::default() - }; - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings: Box::new(setting), - is_deletion: false, - allow_index_creation: false, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - handle.advance_one_successful_batch(); - index_scheduler.assert_internally_consistent(); - - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - // the all the vectors linked to the new specified embedder have been removed - // Only the unknown embedders stays in the document DB - snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###); - let conf = index.embedding_configs(&rtxn).unwrap(); - // even though we specified the vector for the ID 3, it shouldn't be marked - // as user provided since we explicitely marked it as NOT user provided. - snapshot!(format!("{conf:#?}"), @r###" - [ - IndexEmbeddingConfig { - name: "my_doggo_embedder", - config: EmbeddingConfig { - embedder_options: HuggingFace( - EmbedderOptions { - model: "sentence-transformers/all-MiniLM-L6-v2", - revision: Some( - "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", - ), - distribution: None, - }, - ), - prompt: PromptData { - template: "{{doc.doggo}}", - max_bytes: Some( - 400, - ), - }, - quantized: None, - }, - user_provided: RoaringBitmap<[1, 2]>, - }, - ] - "###); - let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); - let embeddings = index.embeddings(&rtxn, docid).unwrap(); - let embedding = &embeddings["my_doggo_embedder"]; - assert!(!embedding.is_empty(), "{embedding:?}"); - - // the document with the id 3 should keep its original embedding - let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); - let embeddings = index.embeddings(&rtxn, docid).unwrap(); - let embeddings = &embeddings["my_doggo_embedder"]; - - snapshot!(embeddings.len(), @"1"); - assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]); - - // If we update marcel it should regenerate its embedding automatically - - let content = serde_json::json!( - [ - { - "id": 3, - "doggo": "marvel", - }, - { - "id": 4, - "doggo": "sorry", - }, - ] - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(1_u128).unwrap(); - let documents_count = - read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file) - .unwrap(); - snapshot!(documents_count, @"2"); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: None, - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - // the document with the id 3 should have its original embedding updated - let rtxn = index.read_txn().unwrap(); - let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); - let doc = index.documents(&rtxn, Some(docid)).unwrap()[0]; - let doc = obkv_to_json(&field_ids, &field_ids_map, doc.1).unwrap(); - snapshot!(json_string!(doc), @r###" - { - "id": 3, - "doggo": "marvel" - } - "###); - - let embeddings = index.embeddings(&rtxn, docid).unwrap(); - let embedding = &embeddings["my_doggo_embedder"]; - - assert!(!embedding.is_empty()); - assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]); - - // the document with the id 4 should generate an embedding - let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap(); - let embeddings = index.embeddings(&rtxn, docid).unwrap(); - let embedding = &embeddings["my_doggo_embedder"]; - - assert!(!embedding.is_empty()); - } - - #[test] - fn delete_document_containing_vector() { - // 1. Add an embedder - // 2. Push two documents containing a simple vector - // 3. Delete the first document - // 4. The user defined roaring bitmap shouldn't contains the id of the first document anymore - // 5. Clear the index - // 6. The user defined roaring bitmap shouldn't contains the id of the second document - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let setting = meilisearch_types::settings::Settings:: { - embedders: Setting::Set(maplit::btreemap! { - S("manual") => Setting::Set(EmbeddingSettings { - source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided), - dimensions: Setting::Set(3), - ..Default::default() - }) - }), - ..Default::default() - }; - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings: Box::new(setting), - is_deletion: false, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - let content = serde_json::json!( - [ - { - "id": 0, - "doggo": "kefir", - "_vectors": { - "manual": vec![0, 0, 0], - } - }, - { - "id": 1, - "doggo": "intel", - "_vectors": { - "manual": vec![1, 1, 1], - } - }, - ] - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap(); - let documents_count = - read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file) - .unwrap(); - snapshot!(documents_count, @"2"); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: None, - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - index_scheduler - .register( - KindWithContent::DocumentDeletion { - index_uid: S("doggos"), - documents_ids: vec![S("1")], - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###); - let conf = index.embedding_configs(&rtxn).unwrap(); - snapshot!(format!("{conf:#?}"), @r###" - [ - IndexEmbeddingConfig { - name: "manual", - config: EmbeddingConfig { - embedder_options: UserProvided( - EmbedderOptions { - dimensions: 3, - distribution: None, - }, - ), - prompt: PromptData { - template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", - max_bytes: Some( - 400, - ), - }, - quantized: None, - }, - user_provided: RoaringBitmap<[0]>, - }, - ] - "###); - let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); - let embeddings = index.embeddings(&rtxn, docid).unwrap(); - let embedding = &embeddings["manual"]; - assert!(!embedding.is_empty(), "{embedding:?}"); - - index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) - .unwrap(); - handle.advance_one_successful_batch(); - - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string(&documents).unwrap(), @"[]"); - let conf = index.embedding_configs(&rtxn).unwrap(); - snapshot!(format!("{conf:#?}"), @r###" - [ - IndexEmbeddingConfig { - name: "manual", - config: EmbeddingConfig { - embedder_options: UserProvided( - EmbedderOptions { - dimensions: 3, - distribution: None, - }, - ), - prompt: PromptData { - template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", - max_bytes: Some( - 400, - ), - }, - quantized: None, - }, - user_provided: RoaringBitmap<[]>, - }, - ] - "###); - } - - #[test] - fn delete_embedder_with_user_provided_vectors() { - // 1. Add two embedders - // 2. Push two documents containing a simple vector - // 3. The documents must not contain the vectors after the update as they are in the vectors db - // 3. Delete the embedders - // 4. The documents contain the vectors again - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let setting = meilisearch_types::settings::Settings:: { - embedders: Setting::Set(maplit::btreemap! { - S("manual") => Setting::Set(EmbeddingSettings { - source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided), - dimensions: Setting::Set(3), - ..Default::default() - }), - S("my_doggo_embedder") => Setting::Set(EmbeddingSettings { - source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace), - model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")), - revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")), - document_template: Setting::Set(S("{{doc.doggo}}")), - ..Default::default() - }), - }), - ..Default::default() - }; - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings: Box::new(setting), - is_deletion: false, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - let content = serde_json::json!( - [ - { - "id": 0, - "doggo": "kefir", - "_vectors": { - "manual": vec![0, 0, 0], - "my_doggo_embedder": vec![1; 384], - } - }, - { - "id": 1, - "doggo": "intel", - "_vectors": { - "manual": vec![1, 1, 1], - } - }, - ] - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap(); - let documents_count = - read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file) - .unwrap(); - snapshot!(documents_count, @"2"); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: None, - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - { - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel"}]"###); - } - - { - let setting = meilisearch_types::settings::Settings:: { - embedders: Setting::Set(maplit::btreemap! { - S("manual") => Setting::Reset, - }), - ..Default::default() - }; - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings: Box::new(setting), - is_deletion: false, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - } - - { - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###); - } - - { - let setting = meilisearch_types::settings::Settings:: { - embedders: Setting::Reset, - ..Default::default() - }; - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings: Box::new(setting), - is_deletion: false, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - } - - { - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - - // FIXME: redaction - snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"embeddings\":[[0.0,0.0,0.0]],\"regenerate\":false},\"my_doggo_embedder\":{\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]],\"regenerate\":false}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"embeddings\":[[1.0,1.0,1.0]],\"regenerate\":false}}}]""###); - } - } -} diff --git a/crates/index-scheduler/src/processing.rs b/crates/index-scheduler/src/processing.rs index aca654de9..d0382a81b 100644 --- a/crates/index-scheduler/src/processing.rs +++ b/crates/index-scheduler/src/processing.rs @@ -8,7 +8,7 @@ use roaring::RoaringBitmap; use crate::utils::ProcessingBatch; -#[derive(Clone)] +#[derive(Clone, Default)] pub struct ProcessingTasks { pub batch: Option>, /// The list of tasks ids that are currently running. @@ -20,7 +20,7 @@ pub struct ProcessingTasks { impl ProcessingTasks { /// Creates an empty `ProcessingAt` struct. pub fn new() -> ProcessingTasks { - ProcessingTasks { batch: None, processing: Arc::new(RoaringBitmap::new()), progress: None } + ProcessingTasks::default() } pub fn get_progress_view(&self) -> Option { diff --git a/crates/index-scheduler/src/queue/batches.rs b/crates/index-scheduler/src/queue/batches.rs new file mode 100644 index 000000000..a31653387 --- /dev/null +++ b/crates/index-scheduler/src/queue/batches.rs @@ -0,0 +1,537 @@ +use std::ops::{Bound, RangeBounds}; + +use meilisearch_types::batches::{Batch, BatchId}; +use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; +use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; +use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; +use meilisearch_types::tasks::{Kind, Status}; +use roaring::{MultiOps, RoaringBitmap}; +use time::OffsetDateTime; + +use super::{Query, Queue}; +use crate::processing::ProcessingTasks; +use crate::utils::{insert_task_datetime, keep_ids_within_datetimes, map_bound, ProcessingBatch}; +use crate::{Error, Result, BEI128}; + +/// Database const names for the `IndexScheduler`. +mod db_name { + pub const ALL_BATCHES: &str = "all-batches"; + + pub const BATCH_STATUS: &str = "batch-status"; + pub const BATCH_KIND: &str = "batch-kind"; + pub const BATCH_INDEX_TASKS: &str = "batch-index-tasks"; + pub const BATCH_ENQUEUED_AT: &str = "batch-enqueued-at"; + pub const BATCH_STARTED_AT: &str = "batch-started-at"; + pub const BATCH_FINISHED_AT: &str = "batch-finished-at"; +} + +pub struct BatchQueue { + /// Contains all the batches accessible by their Id. + pub(crate) all_batches: Database>, + + /// All the batches containing a task matching the selected status. + pub(crate) status: Database, RoaringBitmapCodec>, + /// All the batches ids grouped by the kind of their task. + pub(crate) kind: Database, RoaringBitmapCodec>, + /// Store the batches associated to an index. + pub(crate) index_tasks: Database, + /// Store the batches containing tasks which were enqueued at a specific date + pub(crate) enqueued_at: Database, + /// Store the batches containing finished tasks started at a specific date + pub(crate) started_at: Database, + /// Store the batches containing tasks finished at a specific date + pub(crate) finished_at: Database, +} + +impl BatchQueue { + pub(crate) fn private_clone(&self) -> BatchQueue { + BatchQueue { + all_batches: self.all_batches, + status: self.status, + kind: self.kind, + index_tasks: self.index_tasks, + enqueued_at: self.enqueued_at, + started_at: self.started_at, + finished_at: self.finished_at, + } + } + + pub(super) fn new(env: &Env, wtxn: &mut RwTxn) -> Result { + Ok(Self { + all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?, + status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?, + kind: env.create_database(wtxn, Some(db_name::BATCH_KIND))?, + index_tasks: env.create_database(wtxn, Some(db_name::BATCH_INDEX_TASKS))?, + enqueued_at: env.create_database(wtxn, Some(db_name::BATCH_ENQUEUED_AT))?, + started_at: env.create_database(wtxn, Some(db_name::BATCH_STARTED_AT))?, + finished_at: env.create_database(wtxn, Some(db_name::BATCH_FINISHED_AT))?, + }) + } + + pub(crate) fn all_batch_ids(&self, rtxn: &RoTxn) -> Result { + enum_iterator::all().map(|s| self.get_status(rtxn, s)).union() + } + + pub(crate) fn next_batch_id(&self, rtxn: &RoTxn) -> Result { + Ok(self + .all_batches + .remap_data_type::() + .last(rtxn)? + .map(|(k, _)| k + 1) + .unwrap_or_default()) + } + + pub(crate) fn get_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result> { + Ok(self.all_batches.get(rtxn, &batch_id)?) + } + + /// Returns the whole set of batches that belongs to this index. + pub(crate) fn index_batches(&self, rtxn: &RoTxn, index: &str) -> Result { + Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default()) + } + + pub(crate) fn update_index( + &self, + wtxn: &mut RwTxn, + index: &str, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut batches = self.index_batches(wtxn, index)?; + f(&mut batches); + if batches.is_empty() { + self.index_tasks.delete(wtxn, index)?; + } else { + self.index_tasks.put(wtxn, index, &batches)?; + } + + Ok(()) + } + + pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result { + Ok(self.status.get(rtxn, &status)?.unwrap_or_default()) + } + + pub(crate) fn put_status( + &self, + wtxn: &mut RwTxn, + status: Status, + bitmap: &RoaringBitmap, + ) -> Result<()> { + Ok(self.status.put(wtxn, &status, bitmap)?) + } + + pub(crate) fn update_status( + &self, + wtxn: &mut RwTxn, + status: Status, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.get_status(wtxn, status)?; + f(&mut tasks); + self.put_status(wtxn, status, &tasks)?; + + Ok(()) + } + + pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result { + Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default()) + } + + pub(crate) fn put_kind( + &self, + wtxn: &mut RwTxn, + kind: Kind, + bitmap: &RoaringBitmap, + ) -> Result<()> { + Ok(self.kind.put(wtxn, &kind, bitmap)?) + } + + pub(crate) fn update_kind( + &self, + wtxn: &mut RwTxn, + kind: Kind, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.get_kind(wtxn, kind)?; + f(&mut tasks); + self.put_kind(wtxn, kind, &tasks)?; + Ok(()) + } + + pub(crate) fn write_batch(&self, wtxn: &mut RwTxn, batch: ProcessingBatch) -> Result<()> { + self.all_batches.put( + wtxn, + &batch.uid, + &Batch { + uid: batch.uid, + progress: None, + details: batch.details, + stats: batch.stats, + started_at: batch.started_at, + finished_at: batch.finished_at, + }, + )?; + + for status in batch.statuses { + self.update_status(wtxn, status, |bitmap| { + bitmap.insert(batch.uid); + })?; + } + + for kind in batch.kinds { + self.update_kind(wtxn, kind, |bitmap| { + bitmap.insert(batch.uid); + })?; + } + + for index in batch.indexes { + self.update_index(wtxn, &index, |bitmap| { + bitmap.insert(batch.uid); + })?; + } + + if let Some(enqueued_at) = batch.oldest_enqueued_at { + insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?; + } + if let Some(enqueued_at) = batch.earliest_enqueued_at { + insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?; + } + insert_task_datetime(wtxn, self.started_at, batch.started_at, batch.uid)?; + insert_task_datetime(wtxn, self.finished_at, batch.finished_at.unwrap(), batch.uid)?; + + Ok(()) + } + + /// Convert an iterator to a `Vec` of batches. The batches MUST exist or a + /// `CorruptedTaskQueue` error will be thrown. + pub(crate) fn get_existing_batches( + &self, + rtxn: &RoTxn, + tasks: impl IntoIterator, + processing: &ProcessingTasks, + ) -> Result> { + tasks + .into_iter() + .map(|batch_id| { + if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) { + let mut batch = processing.batch.as_ref().unwrap().to_batch(); + batch.progress = processing.get_progress_view(); + Ok(batch) + } else { + self.get_batch(rtxn, batch_id) + .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) + } + }) + .collect::>() + } +} + +impl Queue { + /// Return the batch ids matched by the given query from the index scheduler's point of view. + pub(crate) fn get_batch_ids( + &self, + rtxn: &RoTxn, + query: &Query, + processing: &ProcessingTasks, + ) -> Result { + let Query { + limit, + from, + reverse, + uids, + batch_uids, + statuses, + types, + index_uids, + canceled_by, + before_enqueued_at, + after_enqueued_at, + before_started_at, + after_started_at, + before_finished_at, + after_finished_at, + } = query; + + let mut batches = self.batches.all_batch_ids(rtxn)?; + if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) { + batches.insert(batch_id); + } + + if let Some(from) = from { + let range = if reverse.unwrap_or_default() { + u32::MIN..*from + } else { + from.saturating_add(1)..u32::MAX + }; + batches.remove_range(range); + } + + if let Some(batch_uids) = &batch_uids { + let batches_uids = RoaringBitmap::from_iter(batch_uids); + batches &= batches_uids; + } + + if let Some(status) = &statuses { + let mut status_batches = RoaringBitmap::new(); + for status in status { + match status { + // special case for Processing batches + Status::Processing => { + if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) { + status_batches.insert(batch_id); + } + } + // Enqueued tasks are not stored in batches + Status::Enqueued => (), + status => status_batches |= &self.batches.get_status(rtxn, *status)?, + }; + } + if !status.contains(&Status::Processing) { + if let Some(ref batch) = processing.batch { + batches.remove(batch.uid); + } + } + batches &= status_batches; + } + + if let Some(task_uids) = &uids { + let mut batches_by_task_uids = RoaringBitmap::new(); + for task_uid in task_uids { + if let Some(task) = self.tasks.get_task(rtxn, *task_uid)? { + if let Some(batch_uid) = task.batch_uid { + batches_by_task_uids.insert(batch_uid); + } + } + } + batches &= batches_by_task_uids; + } + + // There is no database for this query, we must retrieve the task queried by the client and ensure it's valid + if let Some(canceled_by) = &canceled_by { + let mut all_canceled_batches = RoaringBitmap::new(); + for cancel_uid in canceled_by { + if let Some(task) = self.tasks.get_task(rtxn, *cancel_uid)? { + if task.kind.as_kind() == Kind::TaskCancelation + && task.status == Status::Succeeded + { + if let Some(batch_uid) = task.batch_uid { + all_canceled_batches.insert(batch_uid); + } + } + } + } + + // if the canceled_by has been specified but no batch + // matches then we prefer matching zero than all batches. + if all_canceled_batches.is_empty() { + return Ok(RoaringBitmap::new()); + } else { + batches &= all_canceled_batches; + } + } + + if let Some(kind) = &types { + let mut kind_batches = RoaringBitmap::new(); + for kind in kind { + kind_batches |= self.batches.get_kind(rtxn, *kind)?; + if let Some(uid) = processing + .batch + .as_ref() + .and_then(|batch| batch.kinds.contains(kind).then_some(batch.uid)) + { + kind_batches.insert(uid); + } + } + batches &= &kind_batches; + } + + if let Some(index) = &index_uids { + let mut index_batches = RoaringBitmap::new(); + for index in index { + index_batches |= self.batches.index_batches(rtxn, index)?; + if let Some(uid) = processing + .batch + .as_ref() + .and_then(|batch| batch.indexes.contains(index).then_some(batch.uid)) + { + index_batches.insert(uid); + } + } + batches &= &index_batches; + } + + // For the started_at filter, we need to treat the part of the batches that are processing from the part of the + // batches that are not processing. The non-processing ones are filtered normally while the processing ones + // are entirely removed unless the in-memory startedAt variable falls within the date filter. + // Once we have filtered the two subsets, we put them back together and assign it back to `batches`. + batches = { + let (mut filtered_non_processing_batches, mut filtered_processing_batches) = + (&batches - &*processing.processing, &batches & &*processing.processing); + + // special case for Processing batches + // A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds + let mut clear_filtered_processing_batches = + |start: Bound, end: Bound| { + let start = map_bound(start, |b| b.unix_timestamp_nanos()); + let end = map_bound(end, |b| b.unix_timestamp_nanos()); + let is_within_dates = RangeBounds::contains( + &(start, end), + &processing + .batch + .as_ref() + .map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at) + .unix_timestamp_nanos(), + ); + if !is_within_dates { + filtered_processing_batches.clear(); + } + }; + match (after_started_at, before_started_at) { + (None, None) => (), + (None, Some(before)) => { + clear_filtered_processing_batches(Bound::Unbounded, Bound::Excluded(*before)) + } + (Some(after), None) => { + clear_filtered_processing_batches(Bound::Excluded(*after), Bound::Unbounded) + } + (Some(after), Some(before)) => clear_filtered_processing_batches( + Bound::Excluded(*after), + Bound::Excluded(*before), + ), + }; + + keep_ids_within_datetimes( + rtxn, + &mut filtered_non_processing_batches, + self.batches.started_at, + *after_started_at, + *before_started_at, + )?; + filtered_non_processing_batches | filtered_processing_batches + }; + + keep_ids_within_datetimes( + rtxn, + &mut batches, + self.batches.enqueued_at, + *after_enqueued_at, + *before_enqueued_at, + )?; + + keep_ids_within_datetimes( + rtxn, + &mut batches, + self.batches.finished_at, + *after_finished_at, + *before_finished_at, + )?; + + if let Some(limit) = limit { + batches = if query.reverse.unwrap_or_default() { + batches.into_iter().take(*limit as usize).collect() + } else { + batches.into_iter().rev().take(*limit as usize).collect() + }; + } + + Ok(batches) + } + + /// Return the batch ids matching the query along with the total number of batches + /// by ignoring the from and limit parameters from the user's point of view. + /// + /// There are two differences between an internal query and a query executed by + /// the user. + /// + /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated + /// with many indexes internally. + /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. + pub(crate) fn get_batch_ids_from_authorized_indexes( + &self, + rtxn: &RoTxn, + query: &Query, + filters: &meilisearch_auth::AuthFilter, + processing: &ProcessingTasks, + ) -> Result<(RoaringBitmap, u64)> { + // compute all batches matching the filter by ignoring the limits, to find the number of batches matching + // the filter. + // As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares + // us from modifying the underlying implementation, and the performance remains sufficient. + // Should this change, we would modify `get_batch_ids` to directly return the number of matching batches. + let total_batches = + self.get_batch_ids(rtxn, &query.clone().without_limits(), processing)?; + let mut batches = self.get_batch_ids(rtxn, query, processing)?; + + // If the query contains a list of index uid or there is a finite list of authorized indexes, + // then we must exclude all the batches that only contains tasks associated to multiple indexes. + // This works because we don't autobatch tasks associated to multiple indexes with tasks associated + // to a single index. e.g: IndexSwap cannot be batched with IndexCreation. + if query.index_uids.is_some() || !filters.all_indexes_authorized() { + for kind in enum_iterator::all::().filter(|kind| !kind.related_to_one_index()) { + batches -= self.tasks.get_kind(rtxn, kind)?; + if let Some(batch) = processing.batch.as_ref() { + if batch.kinds.contains(&kind) { + batches.remove(batch.uid); + } + } + } + } + + // Any batch that is internally associated with at least one authorized index + // must be returned. + if !filters.all_indexes_authorized() { + let mut valid_indexes = RoaringBitmap::new(); + let mut forbidden_indexes = RoaringBitmap::new(); + + let all_indexes_iter = self.batches.index_tasks.iter(rtxn)?; + for result in all_indexes_iter { + let (index, index_tasks) = result?; + if filters.is_index_authorized(index) { + valid_indexes |= index_tasks; + } else { + forbidden_indexes |= index_tasks; + } + } + if let Some(batch) = processing.batch.as_ref() { + for index in &batch.indexes { + if filters.is_index_authorized(index) { + valid_indexes.insert(batch.uid); + } else { + forbidden_indexes.insert(batch.uid); + } + } + } + + // If a batch had ONE valid task then it should be returned + let invalid_batches = forbidden_indexes - valid_indexes; + + batches -= invalid_batches; + } + + Ok((batches, total_batches.len())) + } + + pub(crate) fn get_batches_from_authorized_indexes( + &self, + rtxn: &RoTxn, + query: &Query, + filters: &meilisearch_auth::AuthFilter, + processing: &ProcessingTasks, + ) -> Result<(Vec, u64)> { + let (batches, total) = + self.get_batch_ids_from_authorized_indexes(rtxn, query, filters, processing)?; + let batches = if query.reverse.unwrap_or_default() { + Box::new(batches.into_iter()) as Box> + } else { + Box::new(batches.into_iter().rev()) as Box> + }; + + let batches = self.batches.get_existing_batches( + rtxn, + batches.take(query.limit.unwrap_or(u32::MAX) as usize), + processing, + )?; + + Ok((batches, total)) + } +} diff --git a/crates/index-scheduler/src/queue/batches_test.rs b/crates/index-scheduler/src/queue/batches_test.rs new file mode 100644 index 000000000..aa84cdaf0 --- /dev/null +++ b/crates/index-scheduler/src/queue/batches_test.rs @@ -0,0 +1,473 @@ +use meili_snap::snapshot; +use meilisearch_auth::AuthFilter; +use meilisearch_types::index_uid_pattern::IndexUidPattern; +use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status}; +use time::{Duration, OffsetDateTime}; + +use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler}; +use crate::test_utils::Breakpoint::*; +use crate::test_utils::{index_creation_task, FailureLocation}; +use crate::{IndexScheduler, Query}; + +#[test] +fn query_batches_from_and_limit() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let kind = index_creation_task("doggo", "bone"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + let kind = index_creation_task("whalo", "plankton"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + let kind = index_creation_task("catto", "his_own_vomit"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); + + handle.advance_n_successful_batches(3); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks"); + + let proc = index_scheduler.processing_tasks.read().unwrap().clone(); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let query = Query { limit: Some(0), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[]"); + + let query = Query { limit: Some(1), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[2,]"); + + let query = Query { limit: Some(2), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[1,2,]"); + + let query = Query { from: Some(1), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); + + let query = Query { from: Some(2), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[0,1,2,]"); + + let query = Query { from: Some(1), limit: Some(1), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[1,]"); + + let query = Query { from: Some(1), limit: Some(2), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); +} + +#[test] +fn query_batches_simple() { + let start_time = OffsetDateTime::now_utc(); + + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("whalo", "fish"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + handle.advance_till([Start, BatchCreated]); + + let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() }; + let (mut batches, _) = index_scheduler + .get_batches_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + assert_eq!(batches.len(), 1); + batches[0].started_at = OffsetDateTime::UNIX_EPOCH; + // Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689 + let batch = serde_json::to_string_pretty(&batches[0]).unwrap(); + snapshot!(batch, @r#" + { + "uid": 0, + "details": { + "primaryKey": "mouse" + }, + "stats": { + "totalNbTasks": 1, + "status": { + "processing": 1 + }, + "types": { + "indexCreation": 1 + }, + "indexUids": { + "catto": 1 + } + }, + "startedAt": "1970-01-01T00:00:00Z", + "finishedAt": null + } + "#); + + let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[]"); // The batches don't contains any enqueued tasks + + let query = + Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[0,]"); // both enqueued and processing tasks in the first tick + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + after_started_at: Some(start_time), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test, which should excludes the enqueued tasks + snapshot!(snapshot_bitmap(&batches), @"[0,]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + before_started_at: Some(start_time), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes before the start of the test, which should excludes all of them + snapshot!(snapshot_bitmap(&batches), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + after_started_at: Some(start_time), + before_started_at: Some(start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test and before one minute after the start of the test, + // which should exclude the enqueued tasks and include the only processing task + snapshot!(snapshot_bitmap(&batches), @"[0,]"); + + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-advancing-a-bit"); + + let second_start_time = OffsetDateTime::now_utc(); + + let query = Query { + statuses: Some(vec![Status::Succeeded, Status::Processing]), + after_started_at: Some(start_time), + before_started_at: Some(start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test and before one minute after the start of the test, + // which should include all tasks + snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); + + let query = Query { + statuses: Some(vec![Status::Succeeded, Status::Processing]), + before_started_at: Some(start_time), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes before the start of the test, which should exclude all tasks + snapshot!(snapshot_bitmap(&batches), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the second part of the test and before one minute after the + // second start of the test, which should exclude all tasks + snapshot!(snapshot_bitmap(&batches), @"[]"); + + // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // we run the same query to verify that, and indeed find that the last task is matched + snapshot!(snapshot_bitmap(&batches), @"[2,]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // enqueued, succeeded, or processing tasks started after the second part of the test, should + // again only return the last task + snapshot!(snapshot_bitmap(&batches), @"[2,]"); + + handle.advance_till([ProcessBatchFailed, AfterProcessing]); + + // now the last task should have failed + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end"); + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // so running the last query should return nothing + snapshot!(snapshot_bitmap(&batches), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // but the same query on failed tasks should return the last task + snapshot!(snapshot_bitmap(&batches), @"[2,]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // but the same query on failed tasks should return the last task + snapshot!(snapshot_bitmap(&batches), @"[2,]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + uids: Some(vec![1]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // same query but with an invalid uid + snapshot!(snapshot_bitmap(&batches), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + uids: Some(vec![2]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // same query but with a valid uid + snapshot!(snapshot_bitmap(&batches), @"[2,]"); +} + +#[test] +fn query_batches_special_rules() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], + }; + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], + }; + let _task = index_scheduler.register(kind, None, false).unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + handle.advance_till([Start, BatchCreated]); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap().clone(); + + let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + // only the first task associated with catto is returned, the indexSwap tasks are excluded! + snapshot!(snapshot_bitmap(&batches), @"[0,]"); + + let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes( + &rtxn, + &query, + &AuthFilter::with_allowed_indexes( + vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), + ), + &proc, + ) + .unwrap(); + // we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks + // associated with doggo -> empty result + snapshot!(snapshot_bitmap(&batches), @"[]"); + + drop(rtxn); + // We're going to advance and process all the batches for the next query to actually hit the db + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + ]); + handle.advance_one_successful_batch(); + handle.advance_n_failed_batches(2); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-processing-everything"); + let rtxn = index_scheduler.env.read_txn().unwrap(); + + let query = Query::default(); + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes( + &rtxn, + &query, + &AuthFilter::with_allowed_indexes( + vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), + ), + &proc, + ) + .unwrap(); + // we asked for all the tasks, but we are only authorized to retrieve the doggo tasks + // -> only the index creation of doggo should be returned + snapshot!(snapshot_bitmap(&batches), @"[1,]"); + + let query = Query::default(); + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes( + &rtxn, + &query, + &AuthFilter::with_allowed_indexes( + vec![ + IndexUidPattern::new_unchecked("catto"), + IndexUidPattern::new_unchecked("doggo"), + ] + .into_iter() + .collect(), + ), + &proc, + ) + .unwrap(); + // we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks + // -> all tasks except the swap of catto with whalo are returned + snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); + + let query = Query::default(); + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + // we asked for all the tasks with all index authorized -> all tasks returned + snapshot!(snapshot_bitmap(&batches), @"[0,1,2,3,]"); +} + +#[test] +fn query_batches_canceled_by() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _ = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _ = index_scheduler.register(kind, None, false).unwrap(); + let kind = KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], + }; + let _task = index_scheduler.register(kind, None, false).unwrap(); + + handle.advance_n_successful_batches(1); + let kind = KindWithContent::TaskCancelation { + query: "test_query".to_string(), + tasks: [0, 1, 2, 3].into_iter().collect(), + }; + let task_cancelation = index_scheduler.register(kind, None, false).unwrap(); + handle.advance_n_successful_batches(1); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // The batch zero was the index creation task, the 1 is the task cancellation + snapshot!(snapshot_bitmap(&batches), @"[1,]"); + + let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes( + &query, + &AuthFilter::with_allowed_indexes( + vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), + ), + ) + .unwrap(); + // Return only 1 because the user is not authorized to see task 2 + snapshot!(snapshot_bitmap(&batches), @"[1,]"); +} diff --git a/crates/index-scheduler/src/queue/mod.rs b/crates/index-scheduler/src/queue/mod.rs new file mode 100644 index 000000000..4921d05e6 --- /dev/null +++ b/crates/index-scheduler/src/queue/mod.rs @@ -0,0 +1,379 @@ +mod batches; +#[cfg(test)] +mod batches_test; +mod tasks; +#[cfg(test)] +mod tasks_test; +#[cfg(test)] +mod test; + +use std::collections::BTreeMap; +use std::time::Duration; + +use file_store::FileStore; +use meilisearch_types::batches::BatchId; +use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; +use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32}; +use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; +use roaring::RoaringBitmap; +use time::format_description::well_known::Rfc3339; +use time::OffsetDateTime; +use uuid::Uuid; + +use self::batches::BatchQueue; +use self::tasks::TaskQueue; +use crate::processing::ProcessingTasks; +use crate::utils::{ + check_index_swap_validity, filter_out_references_to_newer_tasks, ProcessingBatch, +}; +use crate::{Error, IndexSchedulerOptions, Result, TaskId}; + +/// Database const names for the `IndexScheduler`. +mod db_name { + pub const BATCH_TO_TASKS_MAPPING: &str = "batch-to-tasks-mapping"; +} + +/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`]. +/// +/// An empty/default query (where each field is set to `None`) matches all tasks. +/// Each non-null field restricts the set of tasks further. +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct Query { + /// The maximum number of tasks to be matched + pub limit: Option, + /// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched + pub from: Option, + /// The order used to return the tasks. By default the newest tasks are returned first and the boolean is `false`. + pub reverse: Option, + /// The [task ids](`meilisearch_types::tasks::Task::uid`) to be matched + pub uids: Option>, + /// The [batch ids](`meilisearch_types::batches::Batch::uid`) to be matched + pub batch_uids: Option>, + /// The allowed [statuses](`meilisearch_types::tasks::Task::status`) of the matched tasls + pub statuses: Option>, + /// The allowed [kinds](meilisearch_types::tasks::Kind) of the matched tasks. + /// + /// The kind of a task is given by: + /// ``` + /// # use meilisearch_types::tasks::{Task, Kind}; + /// # fn doc_func(task: Task) -> Kind { + /// task.kind.as_kind() + /// # } + /// ``` + pub types: Option>, + /// The allowed [index ids](meilisearch_types::tasks::Task::index_uid) of the matched tasks + pub index_uids: Option>, + /// The [task ids](`meilisearch_types::tasks::Task::uid`) of the [`TaskCancelation`](meilisearch_types::tasks::Task::Kind::TaskCancelation) tasks + /// that canceled the matched tasks. + pub canceled_by: Option>, + /// Exclusive upper bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field. + pub before_enqueued_at: Option, + /// Exclusive lower bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field. + pub after_enqueued_at: Option, + /// Exclusive upper bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field. + pub before_started_at: Option, + /// Exclusive lower bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field. + pub after_started_at: Option, + /// Exclusive upper bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field. + pub before_finished_at: Option, + /// Exclusive lower bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field. + pub after_finished_at: Option, +} + +impl Query { + /// Return `true` if every field of the query is set to `None`, such that the query + /// matches all tasks. + pub fn is_empty(&self) -> bool { + matches!( + self, + Query { + limit: None, + from: None, + reverse: None, + uids: None, + batch_uids: None, + statuses: None, + types: None, + index_uids: None, + canceled_by: None, + before_enqueued_at: None, + after_enqueued_at: None, + before_started_at: None, + after_started_at: None, + before_finished_at: None, + after_finished_at: None, + } + ) + } + + /// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes. + pub fn with_index(self, index_uid: String) -> Self { + let mut index_vec = self.index_uids.unwrap_or_default(); + index_vec.push(index_uid); + Self { index_uids: Some(index_vec), ..self } + } + + // Removes the `from` and `limit` restrictions from the query. + // Useful to get the total number of tasks matching a filter. + pub fn without_limits(self) -> Self { + Query { limit: None, from: None, ..self } + } +} + +/// Structure which holds meilisearch's indexes and schedules the tasks +/// to be performed on them. +pub struct Queue { + pub(crate) tasks: tasks::TaskQueue, + pub(crate) batches: batches::BatchQueue, + + /// Matches a batch id with the associated task ids. + pub(crate) batch_to_tasks_mapping: Database, + + /// The list of files referenced by the tasks. + pub(crate) file_store: FileStore, + + /// The max number of tasks allowed before the scheduler starts to delete + /// the finished tasks automatically. + pub(crate) max_number_of_tasks: usize, +} + +impl Queue { + pub(crate) fn private_clone(&self) -> Queue { + Queue { + tasks: self.tasks.private_clone(), + batches: self.batches.private_clone(), + batch_to_tasks_mapping: self.batch_to_tasks_mapping, + file_store: self.file_store.clone(), + max_number_of_tasks: self.max_number_of_tasks, + } + } + + /// Create an index scheduler and start its run loop. + pub(crate) fn new( + env: &Env, + wtxn: &mut RwTxn, + options: &IndexSchedulerOptions, + ) -> Result { + // allow unreachable_code to get rids of the warning in the case of a test build. + Ok(Self { + file_store: FileStore::new(&options.update_file_path)?, + batch_to_tasks_mapping: env + .create_database(wtxn, Some(db_name::BATCH_TO_TASKS_MAPPING))?, + tasks: TaskQueue::new(env, wtxn)?, + batches: BatchQueue::new(env, wtxn)?, + max_number_of_tasks: options.max_number_of_tasks, + }) + } + + /// Returns the whole set of tasks that belongs to this batch. + pub(crate) fn tasks_in_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result { + Ok(self.batch_to_tasks_mapping.get(rtxn, &batch_id)?.unwrap_or_default()) + } + + /// Convert an iterator to a `Vec` of tasks and edit the `ProcessingBatch` to add the given tasks. + /// + /// The tasks MUST exist, or a `CorruptedTaskQueue` error will be thrown. + pub(crate) fn get_existing_tasks_for_processing_batch( + &self, + rtxn: &RoTxn, + processing_batch: &mut ProcessingBatch, + tasks: impl IntoIterator, + ) -> Result> { + tasks + .into_iter() + .map(|task_id| { + let mut task = self + .tasks + .get_task(rtxn, task_id) + .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)); + processing_batch.processing(&mut task); + task + }) + .collect::>() + } + + pub(crate) fn write_batch( + &self, + wtxn: &mut RwTxn, + batch: ProcessingBatch, + tasks: &RoaringBitmap, + ) -> Result<()> { + self.batch_to_tasks_mapping.put(wtxn, &batch.uid, tasks)?; + self.batches.write_batch(wtxn, batch)?; + Ok(()) + } + + pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> { + match task.content_uuid() { + Some(content_file) => self.delete_update_file(content_file), + None => Ok(()), + } + } + + /// Delete a file from the index scheduler. + /// + /// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method. + pub fn delete_update_file(&self, uuid: Uuid) -> Result<()> { + Ok(self.file_store.delete(uuid)?) + } + + /// Create a file and register it in the index scheduler. + /// + /// The returned file and uuid can be used to associate + /// some data to a task. The file will be kept until + /// the task has been fully processed. + pub fn create_update_file(&self, dry_run: bool) -> Result<(Uuid, file_store::File)> { + if dry_run { + Ok((Uuid::nil(), file_store::File::dry_file()?)) + } else { + Ok(self.file_store.new_update()?) + } + } + + #[cfg(test)] + pub fn create_update_file_with_uuid(&self, uuid: u128) -> Result<(Uuid, file_store::File)> { + Ok(self.file_store.new_update_with_uuid(uuid)?) + } + + /// The size on disk taken by all the updates files contained in the `IndexScheduler`, in bytes. + pub fn compute_update_file_size(&self) -> Result { + Ok(self.file_store.compute_total_size()?) + } + + pub fn register( + &self, + wtxn: &mut RwTxn, + kind: &KindWithContent, + task_id: Option, + dry_run: bool, + ) -> Result { + let next_task_id = self.tasks.next_task_id(wtxn)?; + + if let Some(uid) = task_id { + if uid < next_task_id { + return Err(Error::BadTaskId { received: uid, expected: next_task_id }); + } + } + + let mut task = Task { + uid: task_id.unwrap_or(next_task_id), + // The batch is defined once we starts processing the task + batch_uid: None, + enqueued_at: OffsetDateTime::now_utc(), + started_at: None, + finished_at: None, + error: None, + canceled_by: None, + details: kind.default_details(), + status: Status::Enqueued, + kind: kind.clone(), + }; + // For deletion and cancelation tasks, we want to make extra sure that they + // don't attempt to delete/cancel tasks that are newer than themselves. + filter_out_references_to_newer_tasks(&mut task); + // If the register task is an index swap task, verify that it is well-formed + // (that it does not contain duplicate indexes). + check_index_swap_validity(&task)?; + + // At this point the task is going to be registered and no further checks will be done + if dry_run { + return Ok(task); + } + + // Get rid of the mutability. + let task = task; + self.tasks.register(wtxn, &task)?; + + Ok(task) + } + + /// Register a task to cleanup the task queue if needed + pub fn cleanup_task_queue(&self, wtxn: &mut RwTxn) -> Result<()> { + let nb_tasks = self.tasks.all_task_ids(wtxn)?.len(); + // if we have less than 1M tasks everything is fine + if nb_tasks < self.max_number_of_tasks as u64 { + return Ok(()); + } + + let finished = self.tasks.status.get(wtxn, &Status::Succeeded)?.unwrap_or_default() + | self.tasks.status.get(wtxn, &Status::Failed)?.unwrap_or_default() + | self.tasks.status.get(wtxn, &Status::Canceled)?.unwrap_or_default(); + + let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000)); + + // /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete + // the deletion tasks we enqueued ourselves. + if to_delete.len() < 2 { + tracing::warn!("The task queue is almost full, but no task can be deleted yet."); + // the only thing we can do is hope that the user tasks are going to finish + return Ok(()); + } + + tracing::info!( + "The task queue is almost full. Deleting the oldest {} finished tasks.", + to_delete.len() + ); + + // it's safe to unwrap here because we checked the len above + let newest_task_id = to_delete.iter().last().unwrap(); + let last_task_to_delete = + self.tasks.get_task(wtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?; + + // increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date. + let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1); + + self.register( + wtxn, + &KindWithContent::TaskDeletion { + query: format!( + "?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled", + delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?, + ), + tasks: to_delete, + }, + None, + false, + )?; + + Ok(()) + } + + pub fn get_stats( + &self, + rtxn: &RoTxn, + processing: &ProcessingTasks, + ) -> Result>> { + let mut res = BTreeMap::new(); + let processing_tasks = processing.processing.len(); + + res.insert( + "statuses".to_string(), + enum_iterator::all::() + .map(|s| { + let tasks = self.tasks.get_status(rtxn, s)?.len(); + match s { + Status::Enqueued => Ok((s.to_string(), tasks - processing_tasks)), + Status::Processing => Ok((s.to_string(), processing_tasks)), + s => Ok((s.to_string(), tasks)), + } + }) + .collect::>>()?, + ); + res.insert( + "types".to_string(), + enum_iterator::all::() + .map(|s| Ok((s.to_string(), self.tasks.get_kind(rtxn, s)?.len()))) + .collect::>>()?, + ); + res.insert( + "indexes".to_string(), + self.tasks + .index_tasks + .iter(rtxn)? + .map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?)) + .collect::>>()?, + ); + + Ok(res) + } +} diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_canceled_by/start.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_canceled_by/start.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_canceled_by/start.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_canceled_by/start.snap index ea3a75e8f..410f46929 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_canceled_by/start.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_canceled_by/start.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/processed_all_tasks.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/processed_all_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/processed_all_tasks.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/processed_all_tasks.snap index 9f5c7e4ad..27a641b59 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/processed_all_tasks.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/processed_all_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_first_task.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_first_task.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_first_task.snap index 64503a754..74c4c4a33 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_first_task.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_second_task.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_second_task.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_second_task.snap index 171f6dab4..411e82ea0 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_second_task.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_third_task.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_third_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_third_task.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_third_task.snap index f811b99a6..4c76db95e 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_third_task.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_third_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/after-advancing-a-bit.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/after-advancing-a-bit.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/after-advancing-a-bit.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/after-advancing-a-bit.snap index bf5d0528c..6d899b270 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/after-advancing-a-bit.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/after-advancing-a-bit.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/end.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/end.snap index cbb780494..314f5b067 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/end.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/start.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/start.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/start.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/start.snap index 78a6c4228..6dc897dfa 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/start.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/start.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_special_rules/after-processing-everything.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/after-processing-everything.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_special_rules/after-processing-everything.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/after-processing-everything.snap index 31a08e88b..f40322ac0 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_special_rules/after-processing-everything.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/after-processing-everything.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_special_rules/start.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/start.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_special_rules/start.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/start.snap index 30f62c526..1184c197f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_special_rules/start.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/start.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_canceled_by/start.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_canceled_by/start.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_canceled_by/start.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_canceled_by/start.snap index ea3a75e8f..165d7c4fe 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_canceled_by/start.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_canceled_by/start.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/processed_all_tasks.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/processed_all_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/processed_all_tasks.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/processed_all_tasks.snap index 9f5c7e4ad..079972755 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/processed_all_tasks.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/processed_all_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_first_task.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_first_task.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_first_task.snap index 64503a754..4f9ffb209 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_first_task.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_second_task.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_second_task.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_second_task.snap index 171f6dab4..eb6b0e7ec 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_second_task.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_third_task.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_third_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_third_task.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_third_task.snap index f811b99a6..181f0308c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_third_task.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_third_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/end.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/end.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/end.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/end.snap index cbb780494..3ed017700 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/end.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/end.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/start.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/start.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/start.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/start.snap index 78a6c4228..268f463aa 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/start.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/start.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_special_rules/start.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_special_rules/start.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_special_rules/start.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_special_rules/start.snap index 30f62c526..60c041c05 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_special_rules/start.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_special_rules/start.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/register/everything_is_successfully_registered.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/register/everything_is_successfully_registered.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/register/everything_is_successfully_registered.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/register/everything_is_successfully_registered.snap index 8341d947d..e4d9af541 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/register/everything_is_successfully_registered.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/register/everything_is_successfully_registered.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap similarity index 94% rename from crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap index 03213fbb0..30e8e17a8 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap similarity index 91% rename from crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap index cc38f69a0..76f88a13f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap index 3400d8950..4e3fb5439 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap similarity index 95% rename from crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap index ab4210bed..4cabce94b 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap index 8b69b1cc2..5565994cb 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap index 8b69b1cc2..5565994cb 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap index 8b69b1cc2..5565994cb 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/queue/tasks.rs b/crates/index-scheduler/src/queue/tasks.rs new file mode 100644 index 000000000..c88192e17 --- /dev/null +++ b/crates/index-scheduler/src/queue/tasks.rs @@ -0,0 +1,518 @@ +use std::ops::{Bound, RangeBounds}; + +use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; +use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; +use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; +use meilisearch_types::tasks::{Kind, Status, Task}; +use roaring::{MultiOps, RoaringBitmap}; +use time::OffsetDateTime; + +use super::{Query, Queue}; +use crate::processing::ProcessingTasks; +use crate::utils::{self, insert_task_datetime, keep_ids_within_datetimes, map_bound}; +use crate::{Error, Result, TaskId, BEI128}; + +/// Database const names for the `IndexScheduler`. +mod db_name { + pub const ALL_TASKS: &str = "all-tasks"; + pub const STATUS: &str = "status"; + pub const KIND: &str = "kind"; + pub const INDEX_TASKS: &str = "index-tasks"; + pub const CANCELED_BY: &str = "canceled_by"; + pub const ENQUEUED_AT: &str = "enqueued-at"; + pub const STARTED_AT: &str = "started-at"; + pub const FINISHED_AT: &str = "finished-at"; +} + +pub struct TaskQueue { + /// The main database, it contains all the tasks accessible by their Id. + pub(crate) all_tasks: Database>, + + /// All the tasks ids grouped by their status. + // TODO we should not be able to serialize a `Status::Processing` in this database. + pub(crate) status: Database, RoaringBitmapCodec>, + /// All the tasks ids grouped by their kind. + pub(crate) kind: Database, RoaringBitmapCodec>, + /// Store the tasks associated to an index. + pub(crate) index_tasks: Database, + /// Store the tasks that were canceled by a task uid + pub(crate) canceled_by: Database, + /// Store the task ids of tasks which were enqueued at a specific date + pub(crate) enqueued_at: Database, + /// Store the task ids of finished tasks which started being processed at a specific date + pub(crate) started_at: Database, + /// Store the task ids of tasks which finished at a specific date + pub(crate) finished_at: Database, +} + +impl TaskQueue { + pub(crate) fn private_clone(&self) -> TaskQueue { + TaskQueue { + all_tasks: self.all_tasks, + status: self.status, + kind: self.kind, + index_tasks: self.index_tasks, + canceled_by: self.canceled_by, + enqueued_at: self.enqueued_at, + started_at: self.started_at, + finished_at: self.finished_at, + } + } + + pub(super) fn new(env: &Env, wtxn: &mut RwTxn) -> Result { + Ok(Self { + all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?, + status: env.create_database(wtxn, Some(db_name::STATUS))?, + kind: env.create_database(wtxn, Some(db_name::KIND))?, + index_tasks: env.create_database(wtxn, Some(db_name::INDEX_TASKS))?, + canceled_by: env.create_database(wtxn, Some(db_name::CANCELED_BY))?, + enqueued_at: env.create_database(wtxn, Some(db_name::ENQUEUED_AT))?, + started_at: env.create_database(wtxn, Some(db_name::STARTED_AT))?, + finished_at: env.create_database(wtxn, Some(db_name::FINISHED_AT))?, + }) + } + + pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result> { + Ok(self.all_tasks.remap_data_type::().last(rtxn)?.map(|(k, _)| k + 1)) + } + + pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result { + Ok(self.last_task_id(rtxn)?.unwrap_or_default()) + } + + pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result { + enum_iterator::all().map(|s| self.get_status(rtxn, s)).union() + } + + pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result> { + Ok(self.all_tasks.get(rtxn, &task_id)?) + } + + pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> { + let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?; + + debug_assert!(old_task != *task); + debug_assert_eq!(old_task.uid, task.uid); + debug_assert!(old_task.batch_uid.is_none() && task.batch_uid.is_some()); + debug_assert!( + old_task.batch_uid.is_none() && task.batch_uid.is_some(), + "\n==> old: {old_task:?}\n==> new: {task:?}" + ); + + if old_task.status != task.status { + self.update_status(wtxn, old_task.status, |bitmap| { + bitmap.remove(task.uid); + })?; + self.update_status(wtxn, task.status, |bitmap| { + bitmap.insert(task.uid); + })?; + } + + if old_task.kind.as_kind() != task.kind.as_kind() { + self.update_kind(wtxn, old_task.kind.as_kind(), |bitmap| { + bitmap.remove(task.uid); + })?; + self.update_kind(wtxn, task.kind.as_kind(), |bitmap| { + bitmap.insert(task.uid); + })?; + } + + assert_eq!( + old_task.enqueued_at, task.enqueued_at, + "Cannot update a task's enqueued_at time" + ); + if old_task.started_at != task.started_at { + assert!(old_task.started_at.is_none(), "Cannot update a task's started_at time"); + if let Some(started_at) = task.started_at { + insert_task_datetime(wtxn, self.started_at, started_at, task.uid)?; + } + } + if old_task.finished_at != task.finished_at { + assert!(old_task.finished_at.is_none(), "Cannot update a task's finished_at time"); + if let Some(finished_at) = task.finished_at { + insert_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?; + } + } + + self.all_tasks.put(wtxn, &task.uid, task)?; + Ok(()) + } + + /// Returns the whole set of tasks that belongs to this index. + pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result { + Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default()) + } + + pub(crate) fn update_index( + &self, + wtxn: &mut RwTxn, + index: &str, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.index_tasks(wtxn, index)?; + f(&mut tasks); + if tasks.is_empty() { + self.index_tasks.delete(wtxn, index)?; + } else { + self.index_tasks.put(wtxn, index, &tasks)?; + } + + Ok(()) + } + + pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result { + Ok(self.status.get(rtxn, &status)?.unwrap_or_default()) + } + + pub(crate) fn put_status( + &self, + wtxn: &mut RwTxn, + status: Status, + bitmap: &RoaringBitmap, + ) -> Result<()> { + Ok(self.status.put(wtxn, &status, bitmap)?) + } + + pub(crate) fn update_status( + &self, + wtxn: &mut RwTxn, + status: Status, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.get_status(wtxn, status)?; + f(&mut tasks); + self.put_status(wtxn, status, &tasks)?; + + Ok(()) + } + + pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result { + Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default()) + } + + pub(crate) fn put_kind( + &self, + wtxn: &mut RwTxn, + kind: Kind, + bitmap: &RoaringBitmap, + ) -> Result<()> { + Ok(self.kind.put(wtxn, &kind, bitmap)?) + } + + pub(crate) fn update_kind( + &self, + wtxn: &mut RwTxn, + kind: Kind, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.get_kind(wtxn, kind)?; + f(&mut tasks); + self.put_kind(wtxn, kind, &tasks)?; + + Ok(()) + } + + /// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a + /// `CorruptedTaskQueue` error will be thrown. + pub(crate) fn get_existing_tasks( + &self, + rtxn: &RoTxn, + tasks: impl IntoIterator, + ) -> Result> { + tasks + .into_iter() + .map(|task_id| { + self.get_task(rtxn, task_id).and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) + }) + .collect::>() + } + + pub(crate) fn register(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> { + self.all_tasks.put(wtxn, &task.uid, task)?; + + for index in task.indexes() { + self.update_index(wtxn, index, |bitmap| { + bitmap.insert(task.uid); + })?; + } + + self.update_status(wtxn, Status::Enqueued, |bitmap| { + bitmap.insert(task.uid); + })?; + + self.update_kind(wtxn, task.kind.as_kind(), |bitmap| { + bitmap.insert(task.uid); + })?; + + utils::insert_task_datetime(wtxn, self.enqueued_at, task.enqueued_at, task.uid)?; + + Ok(()) + } +} + +impl Queue { + /// Return the task ids matched by the given query from the index scheduler's point of view. + pub(crate) fn get_task_ids( + &self, + rtxn: &RoTxn, + query: &Query, + processing_tasks: &ProcessingTasks, + ) -> Result { + let ProcessingTasks { batch: processing_batch, processing: processing_tasks, progress: _ } = + processing_tasks; + let Query { + limit, + from, + reverse, + uids, + batch_uids, + statuses, + types, + index_uids, + canceled_by, + before_enqueued_at, + after_enqueued_at, + before_started_at, + after_started_at, + before_finished_at, + after_finished_at, + } = query; + + let mut tasks = self.tasks.all_task_ids(rtxn)?; + + if let Some(from) = from { + let range = if reverse.unwrap_or_default() { + u32::MIN..*from + } else { + from.saturating_add(1)..u32::MAX + }; + tasks.remove_range(range); + } + + if let Some(batch_uids) = batch_uids { + let mut batch_tasks = RoaringBitmap::new(); + for batch_uid in batch_uids { + if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) { + batch_tasks |= &**processing_tasks; + } else { + batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?; + } + } + tasks &= batch_tasks; + } + + if let Some(status) = statuses { + let mut status_tasks = RoaringBitmap::new(); + for status in status { + match status { + // special case for Processing tasks + Status::Processing => { + status_tasks |= &**processing_tasks; + } + status => status_tasks |= &self.tasks.get_status(rtxn, *status)?, + }; + } + if !status.contains(&Status::Processing) { + tasks -= &**processing_tasks; + } + tasks &= status_tasks; + } + + if let Some(uids) = uids { + let uids = RoaringBitmap::from_iter(uids); + tasks &= &uids; + } + + if let Some(canceled_by) = canceled_by { + let mut all_canceled_tasks = RoaringBitmap::new(); + for cancel_task_uid in canceled_by { + if let Some(canceled_by_uid) = self.tasks.canceled_by.get(rtxn, cancel_task_uid)? { + all_canceled_tasks |= canceled_by_uid; + } + } + + // if the canceled_by has been specified but no task + // matches then we prefer matching zero than all tasks. + if all_canceled_tasks.is_empty() { + return Ok(RoaringBitmap::new()); + } else { + tasks &= all_canceled_tasks; + } + } + + if let Some(kind) = types { + let mut kind_tasks = RoaringBitmap::new(); + for kind in kind { + kind_tasks |= self.tasks.get_kind(rtxn, *kind)?; + } + tasks &= &kind_tasks; + } + + if let Some(index) = index_uids { + let mut index_tasks = RoaringBitmap::new(); + for index in index { + index_tasks |= self.tasks.index_tasks(rtxn, index)?; + } + tasks &= &index_tasks; + } + + // For the started_at filter, we need to treat the part of the tasks that are processing from the part of the + // tasks that are not processing. The non-processing ones are filtered normally while the processing ones + // are entirely removed unless the in-memory startedAt variable falls within the date filter. + // Once we have filtered the two subsets, we put them back together and assign it back to `tasks`. + tasks = { + let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) = + (&tasks - &**processing_tasks, &tasks & &**processing_tasks); + + // special case for Processing tasks + // A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds + let mut clear_filtered_processing_tasks = + |start: Bound, end: Bound| { + let start = map_bound(start, |b| b.unix_timestamp_nanos()); + let end = map_bound(end, |b| b.unix_timestamp_nanos()); + let is_within_dates = RangeBounds::contains( + &(start, end), + &processing_batch + .as_ref() + .map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at) + .unix_timestamp_nanos(), + ); + if !is_within_dates { + filtered_processing_tasks.clear(); + } + }; + match (after_started_at, before_started_at) { + (None, None) => (), + (None, Some(before)) => { + clear_filtered_processing_tasks(Bound::Unbounded, Bound::Excluded(*before)) + } + (Some(after), None) => { + clear_filtered_processing_tasks(Bound::Excluded(*after), Bound::Unbounded) + } + (Some(after), Some(before)) => clear_filtered_processing_tasks( + Bound::Excluded(*after), + Bound::Excluded(*before), + ), + }; + + keep_ids_within_datetimes( + rtxn, + &mut filtered_non_processing_tasks, + self.tasks.started_at, + *after_started_at, + *before_started_at, + )?; + filtered_non_processing_tasks | filtered_processing_tasks + }; + + keep_ids_within_datetimes( + rtxn, + &mut tasks, + self.tasks.enqueued_at, + *after_enqueued_at, + *before_enqueued_at, + )?; + + keep_ids_within_datetimes( + rtxn, + &mut tasks, + self.tasks.finished_at, + *after_finished_at, + *before_finished_at, + )?; + + if let Some(limit) = limit { + tasks = if query.reverse.unwrap_or_default() { + tasks.into_iter().take(*limit as usize).collect() + } else { + tasks.into_iter().rev().take(*limit as usize).collect() + }; + } + + Ok(tasks) + } + + pub(crate) fn get_task_ids_from_authorized_indexes( + &self, + rtxn: &RoTxn, + query: &Query, + filters: &meilisearch_auth::AuthFilter, + processing_tasks: &ProcessingTasks, + ) -> Result<(RoaringBitmap, u64)> { + // compute all tasks matching the filter by ignoring the limits, to find the number of tasks matching + // the filter. + // As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares + // us from modifying the underlying implementation, and the performance remains sufficient. + // Should this change, we would modify `get_task_ids` to directly return the number of matching tasks. + let total_tasks = + self.get_task_ids(rtxn, &query.clone().without_limits(), processing_tasks)?; + let mut tasks = self.get_task_ids(rtxn, query, processing_tasks)?; + + // If the query contains a list of index uid or there is a finite list of authorized indexes, + // then we must exclude all the kinds that aren't associated to one and only one index. + if query.index_uids.is_some() || !filters.all_indexes_authorized() { + for kind in enum_iterator::all::().filter(|kind| !kind.related_to_one_index()) { + tasks -= self.tasks.get_kind(rtxn, kind)?; + } + } + + // Any task that is internally associated with a non-authorized index + // must be discarded. + if !filters.all_indexes_authorized() { + let all_indexes_iter = self.tasks.index_tasks.iter(rtxn)?; + for result in all_indexes_iter { + let (index, index_tasks) = result?; + if !filters.is_index_authorized(index) { + tasks -= index_tasks; + } + } + } + + Ok((tasks, total_tasks.len())) + } + + pub(crate) fn get_tasks_from_authorized_indexes( + &self, + rtxn: &RoTxn, + query: &Query, + filters: &meilisearch_auth::AuthFilter, + processing_tasks: &ProcessingTasks, + ) -> Result<(Vec, u64)> { + let (tasks, total) = + self.get_task_ids_from_authorized_indexes(rtxn, query, filters, processing_tasks)?; + let tasks = if query.reverse.unwrap_or_default() { + Box::new(tasks.into_iter()) as Box> + } else { + Box::new(tasks.into_iter().rev()) as Box> + }; + let tasks = self + .tasks + .get_existing_tasks(rtxn, tasks.take(query.limit.unwrap_or(u32::MAX) as usize))?; + + let ProcessingTasks { batch, processing, progress: _ } = processing_tasks; + + let ret = tasks.into_iter(); + if processing.is_empty() || batch.is_none() { + Ok((ret.collect(), total)) + } else { + // Safe because we ensured there was a batch in the previous branch + let batch = batch.as_ref().unwrap(); + Ok(( + ret.map(|task| { + if processing.contains(task.uid) { + Task { + status: Status::Processing, + batch_uid: Some(batch.uid), + started_at: Some(batch.started_at), + ..task + } + } else { + task + } + }) + .collect(), + total, + )) + } + } +} diff --git a/crates/index-scheduler/src/queue/tasks_test.rs b/crates/index-scheduler/src/queue/tasks_test.rs new file mode 100644 index 000000000..d60d621d1 --- /dev/null +++ b/crates/index-scheduler/src/queue/tasks_test.rs @@ -0,0 +1,441 @@ +use meili_snap::snapshot; +use meilisearch_auth::AuthFilter; +use meilisearch_types::index_uid_pattern::IndexUidPattern; +use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status}; +use time::{Duration, OffsetDateTime}; + +use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler}; +use crate::test_utils::Breakpoint::*; +use crate::test_utils::{index_creation_task, FailureLocation}; +use crate::{IndexScheduler, Query}; + +#[test] +fn query_tasks_from_and_limit() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let kind = index_creation_task("doggo", "bone"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + let kind = index_creation_task("whalo", "plankton"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + let kind = index_creation_task("catto", "his_own_vomit"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); + + handle.advance_n_successful_batches(3); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks"); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + let processing = index_scheduler.processing_tasks.read().unwrap(); + let query = Query { limit: Some(0), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { limit: Some(1), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { limit: Some(2), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); + + let query = Query { from: Some(1), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); + + let query = Query { from: Some(2), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); + + let query = Query { from: Some(1), limit: Some(1), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[1,]"); + + let query = Query { from: Some(1), limit: Some(2), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); +} + +#[test] +fn query_tasks_simple() { + let start_time = OffsetDateTime::now_utc(); + + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("whalo", "fish"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + handle.advance_till([Start, BatchCreated]); + + let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick + + let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick + + let query = + Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + after_started_at: Some(start_time), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test, which should excludes the enqueued tasks + snapshot!(snapshot_bitmap(&tasks), @"[0,]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + before_started_at: Some(start_time), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes before the start of the test, which should excludes all of them + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + after_started_at: Some(start_time), + before_started_at: Some(start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test and before one minute after the start of the test, + // which should exclude the enqueued tasks and include the only processing task + snapshot!(snapshot_bitmap(&tasks), @"[0,]"); + + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + + let second_start_time = OffsetDateTime::now_utc(); + + let query = Query { + statuses: Some(vec![Status::Succeeded, Status::Processing]), + after_started_at: Some(start_time), + before_started_at: Some(start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test and before one minute after the start of the test, + // which should include all tasks + snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); + + let query = Query { + statuses: Some(vec![Status::Succeeded, Status::Processing]), + before_started_at: Some(start_time), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes before the start of the test, which should exclude all tasks + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the second part of the test and before one minute after the + // second start of the test, which should exclude all tasks + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // we run the same query to verify that, and indeed find that the last task is matched + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // enqueued, succeeded, or processing tasks started after the second part of the test, should + // again only return the last task + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + handle.advance_till([ProcessBatchFailed, AfterProcessing]); + + // now the last task should have failed + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end"); + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // so running the last query should return nothing + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // but the same query on failed tasks should return the last task + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // but the same query on failed tasks should return the last task + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + uids: Some(vec![1]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // same query but with an invalid uid + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + uids: Some(vec![2]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // same query but with a valid uid + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); +} + +#[test] +fn query_tasks_special_rules() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], + }; + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], + }; + let _task = index_scheduler.register(kind, None, false).unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + handle.advance_till([Start, BatchCreated]); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + + let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + // only the first task associated with catto is returned, the indexSwap tasks are excluded! + snapshot!(snapshot_bitmap(&tasks), @"[0,]"); + + let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes( + &rtxn, + &query, + &AuthFilter::with_allowed_indexes( + vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), + ), + &proc, + ) + .unwrap(); + // we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks + // associated with doggo -> empty result + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query::default(); + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes( + &rtxn, + &query, + &AuthFilter::with_allowed_indexes( + vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), + ), + &proc, + ) + .unwrap(); + // we asked for all the tasks, but we are only authorized to retrieve the doggo tasks + // -> only the index creation of doggo should be returned + snapshot!(snapshot_bitmap(&tasks), @"[1,]"); + + let query = Query::default(); + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes( + &rtxn, + &query, + &AuthFilter::with_allowed_indexes( + vec![ + IndexUidPattern::new_unchecked("catto"), + IndexUidPattern::new_unchecked("doggo"), + ] + .into_iter() + .collect(), + ), + &proc, + ) + .unwrap(); + // we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks + // -> all tasks except the swap of catto with whalo are returned + snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); + + let query = Query::default(); + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + // we asked for all the tasks with all index authorized -> all tasks returned + snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,3,]"); +} + +#[test] +fn query_tasks_canceled_by() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _ = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _ = index_scheduler.register(kind, None, false).unwrap(); + let kind = KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], + }; + let _task = index_scheduler.register(kind, None, false).unwrap(); + + handle.advance_n_successful_batches(1); + let kind = KindWithContent::TaskCancelation { + query: "test_query".to_string(), + tasks: [0, 1, 2, 3].into_iter().collect(), + }; + let task_cancelation = index_scheduler.register(kind, None, false).unwrap(); + handle.advance_n_successful_batches(1); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + let rtxn = index_scheduler.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + // 0 is not returned because it was not canceled, 3 is not returned because it is the uid of the + // taskCancelation itself + snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); + + let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes( + &rtxn, + &query, + &AuthFilter::with_allowed_indexes( + vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), + ), + &proc, + ) + .unwrap(); + // Return only 1 because the user is not authorized to see task 2 + snapshot!(snapshot_bitmap(&tasks), @"[1,]"); +} diff --git a/crates/index-scheduler/src/queue/test.rs b/crates/index-scheduler/src/queue/test.rs new file mode 100644 index 000000000..5a886b088 --- /dev/null +++ b/crates/index-scheduler/src/queue/test.rs @@ -0,0 +1,395 @@ +use big_s::S; +use meili_snap::{json_string, snapshot}; +use meilisearch_types::error::ErrorCode; +use meilisearch_types::tasks::{KindWithContent, Status}; +use roaring::RoaringBitmap; + +use crate::insta_snapshot::snapshot_index_scheduler; +use crate::test_utils::Breakpoint::*; +use crate::test_utils::{index_creation_task, replace_document_import_task}; +use crate::{IndexScheduler, Query}; + +#[test] +fn register() { + // In this test, the handle doesn't make any progress, we only check that the tasks are registered + let (index_scheduler, mut _handle) = IndexScheduler::test(true, vec![]); + + let kinds = [ + index_creation_task("catto", "mouse"), + replace_document_import_task("catto", None, 0, 12), + replace_document_import_task("catto", None, 1, 50), + replace_document_import_task("doggo", Some("bone"), 2, 5000), + ]; + let (_, file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + file.persist().unwrap(); + let (_, file) = index_scheduler.queue.create_update_file_with_uuid(1).unwrap(); + file.persist().unwrap(); + let (_, file) = index_scheduler.queue.create_update_file_with_uuid(2).unwrap(); + file.persist().unwrap(); + + for (idx, kind) in kinds.into_iter().enumerate() { + let k = kind.as_kind(); + let task = index_scheduler.register(kind, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + + assert_eq!(task.uid, idx as u32); + assert_eq!(task.status, Status::Enqueued); + assert_eq!(task.kind.as_kind(), k); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "everything_is_successfully_registered"); +} + +#[test] +fn dry_run() { + let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, None, true).unwrap(); + snapshot!(task.uid, @"0"); + snapshot!(snapshot_index_scheduler(&index_scheduler), @r" + ### Autobatching Enabled = true + ### Processing batch None: + [] + ---------------------------------------------------------------------- + ### All Tasks: + ---------------------------------------------------------------------- + ### Status: + ---------------------------------------------------------------------- + ### Kind: + ---------------------------------------------------------------------- + ### Index Tasks: + ---------------------------------------------------------------------- + ### Index Mapper: + + ---------------------------------------------------------------------- + ### Canceled By: + + ---------------------------------------------------------------------- + ### Enqueued At: + ---------------------------------------------------------------------- + ### Started At: + ---------------------------------------------------------------------- + ### Finished At: + ---------------------------------------------------------------------- + ### All Batches: + ---------------------------------------------------------------------- + ### Batch to tasks mapping: + ---------------------------------------------------------------------- + ### Batches Status: + ---------------------------------------------------------------------- + ### Batches Kind: + ---------------------------------------------------------------------- + ### Batches Index Tasks: + ---------------------------------------------------------------------- + ### Batches Enqueued At: + ---------------------------------------------------------------------- + ### Batches Started At: + ---------------------------------------------------------------------- + ### Batches Finished At: + ---------------------------------------------------------------------- + ### File Store: + + ---------------------------------------------------------------------- + "); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, Some(12), true).unwrap(); + snapshot!(task.uid, @"12"); + snapshot!(snapshot_index_scheduler(&index_scheduler), @r" + ### Autobatching Enabled = true + ### Processing batch None: + [] + ---------------------------------------------------------------------- + ### All Tasks: + ---------------------------------------------------------------------- + ### Status: + ---------------------------------------------------------------------- + ### Kind: + ---------------------------------------------------------------------- + ### Index Tasks: + ---------------------------------------------------------------------- + ### Index Mapper: + + ---------------------------------------------------------------------- + ### Canceled By: + + ---------------------------------------------------------------------- + ### Enqueued At: + ---------------------------------------------------------------------- + ### Started At: + ---------------------------------------------------------------------- + ### Finished At: + ---------------------------------------------------------------------- + ### All Batches: + ---------------------------------------------------------------------- + ### Batch to tasks mapping: + ---------------------------------------------------------------------- + ### Batches Status: + ---------------------------------------------------------------------- + ### Batches Kind: + ---------------------------------------------------------------------- + ### Batches Index Tasks: + ---------------------------------------------------------------------- + ### Batches Enqueued At: + ---------------------------------------------------------------------- + ### Batches Started At: + ---------------------------------------------------------------------- + ### Batches Finished At: + ---------------------------------------------------------------------- + ### File Store: + + ---------------------------------------------------------------------- + "); +} + +#[test] +fn basic_set_taskid() { + let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(task.uid, @"0"); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, Some(12), false).unwrap(); + snapshot!(task.uid, @"12"); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let error = index_scheduler.register(kind, Some(5), false).unwrap_err(); + snapshot!(error, @"Received bad task id: 5 should be >= to 13."); +} + +#[test] +fn test_disable_auto_deletion_of_tasks() { + let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| { + config.cleanup_enabled = false; + config.max_number_of_tasks = 2; + }); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + handle.advance_one_failed_batch(); + + // at this point the max number of tasks is reached + // we can still enqueue multiple tasks + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = + index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); + drop(rtxn); + drop(proc); + + // now we're above the max number of tasks + // and if we try to advance in the tick function no new task deletion should be enqueued + handle.advance_till([Start, BatchCreated]); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = + index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued"); + drop(rtxn); + drop(proc); +} + +#[test] +fn test_auto_deletion_of_tasks() { + let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| { + config.max_number_of_tasks = 2; + }); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + handle.advance_one_failed_batch(); + + // at this point the max number of tasks is reached + // we can still enqueue multiple tasks + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = + index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); + drop(rtxn); + drop(proc); + + // now we're above the max number of tasks + // and if we try to advance in the tick function a new task deletion should be enqueued + handle.advance_till([Start, BatchCreated]); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = + index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued"); + drop(rtxn); + drop(proc); + + handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = + index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed"); + drop(rtxn); + drop(proc); + + handle.advance_one_failed_batch(); + // a new task deletion has been enqueued + handle.advance_one_successful_batch(); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = + index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion"); + drop(rtxn); + drop(proc); + + handle.advance_one_failed_batch(); + handle.advance_one_successful_batch(); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = + index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed"); + drop(rtxn); + drop(proc); +} + +#[test] +fn test_task_queue_is_full() { + let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| { + // that's the minimum map size possible + config.task_db_size = 1048576; + }); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + // on average this task takes ~600 bytes + loop { + let result = index_scheduler.register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ); + if result.is_err() { + break; + } + handle.advance_one_failed_batch(); + } + index_scheduler.assert_internally_consistent(); + + // at this point the task DB shoud have reached its limit and we should not be able to register new tasks + let result = index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap_err(); + snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); + // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code + snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice"); + + // Even the task deletion that doesn't delete anything shouldn't be accepted + let result = index_scheduler + .register( + KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() }, + None, + false, + ) + .unwrap_err(); + snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); + // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code + snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice"); + + // But a task deletion that delete something should works + index_scheduler + .register( + KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + // Now we should be able to enqueue a few tasks again + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + handle.advance_one_failed_batch(); +} diff --git a/crates/index-scheduler/src/scheduler/autobatcher.rs b/crates/index-scheduler/src/scheduler/autobatcher.rs new file mode 100644 index 000000000..3363b2c8f --- /dev/null +++ b/crates/index-scheduler/src/scheduler/autobatcher.rs @@ -0,0 +1,514 @@ +/*! +The autobatcher is responsible for combining the next enqueued +tasks affecting a single index into a [batch](crate::batch::Batch). + +The main function of the autobatcher is [`next_autobatch`]. +*/ + +use std::ops::ControlFlow::{self, Break, Continue}; + +use meilisearch_types::milli::update::IndexDocumentsMethod::{ + self, ReplaceDocuments, UpdateDocuments, +}; +use meilisearch_types::tasks::TaskId; + +use crate::KindWithContent; + +/// Succinctly describes a task's [`Kind`](meilisearch_types::tasks::Kind) +/// for the purpose of simplifying the implementation of the autobatcher. +/// +/// Only the non-prioritised tasks that can be grouped in a batch have a corresponding [`AutobatchKind`] +enum AutobatchKind { + DocumentImport { + method: IndexDocumentsMethod, + allow_index_creation: bool, + primary_key: Option, + }, + DocumentEdition, + DocumentDeletion { + by_filter: bool, + }, + DocumentClear, + Settings { + allow_index_creation: bool, + }, + IndexCreation, + IndexDeletion, + IndexUpdate, + IndexSwap, +} + +impl AutobatchKind { + #[rustfmt::skip] + fn allow_index_creation(&self) -> Option { + match self { + AutobatchKind::DocumentImport { allow_index_creation, .. } + | AutobatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation), + _ => None, + } + } + + fn primary_key(&self) -> Option> { + match self { + AutobatchKind::DocumentImport { primary_key, .. } => Some(primary_key.as_deref()), + _ => None, + } + } +} + +impl From for AutobatchKind { + fn from(kind: KindWithContent) -> Self { + match kind { + KindWithContent::DocumentAdditionOrUpdate { + method, + allow_index_creation, + primary_key, + .. + } => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key }, + KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition, + KindWithContent::DocumentDeletion { .. } => { + AutobatchKind::DocumentDeletion { by_filter: false } + } + KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear, + KindWithContent::DocumentDeletionByFilter { .. } => { + AutobatchKind::DocumentDeletion { by_filter: true } + } + KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => { + AutobatchKind::Settings { + allow_index_creation: allow_index_creation && !is_deletion, + } + } + KindWithContent::IndexDeletion { .. } => AutobatchKind::IndexDeletion, + KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation, + KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate, + KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap, + KindWithContent::TaskCancelation { .. } + | KindWithContent::TaskDeletion { .. } + | KindWithContent::DumpCreation { .. } + | KindWithContent::SnapshotCreation => { + panic!("The autobatcher should never be called with tasks that don't apply to an index.") + } + } + } +} + +#[derive(Debug)] +pub enum BatchKind { + DocumentClear { + ids: Vec, + }, + DocumentOperation { + method: IndexDocumentsMethod, + allow_index_creation: bool, + primary_key: Option, + operation_ids: Vec, + }, + DocumentEdition { + id: TaskId, + }, + DocumentDeletion { + deletion_ids: Vec, + includes_by_filter: bool, + }, + ClearAndSettings { + other: Vec, + allow_index_creation: bool, + settings_ids: Vec, + }, + Settings { + allow_index_creation: bool, + settings_ids: Vec, + }, + IndexDeletion { + ids: Vec, + }, + IndexCreation { + id: TaskId, + }, + IndexUpdate { + id: TaskId, + }, + IndexSwap { + id: TaskId, + }, +} + +impl BatchKind { + #[rustfmt::skip] + fn allow_index_creation(&self) -> Option { + match self { + BatchKind::DocumentOperation { allow_index_creation, .. } + | BatchKind::ClearAndSettings { allow_index_creation, .. } + | BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation), + _ => None, + } + } + + fn primary_key(&self) -> Option> { + match self { + BatchKind::DocumentOperation { primary_key, .. } => Some(primary_key.as_deref()), + _ => None, + } + } +} + +impl BatchKind { + /// Returns a `ControlFlow::Break` if you must stop right now. + /// The boolean tell you if an index has been created by the batched task. + /// To ease the writing of the code. `true` can be returned when you don't need to create an index + /// but false can't be returned if you needs to create an index. + // TODO use an AutoBatchKind as input + pub fn new( + task_id: TaskId, + kind: KindWithContent, + primary_key: Option<&str>, + ) -> (ControlFlow, bool) { + use AutobatchKind as K; + + match AutobatchKind::from(kind) { + K::IndexCreation => (Break(BatchKind::IndexCreation { id: task_id }), true), + K::IndexDeletion => (Break(BatchKind::IndexDeletion { ids: vec![task_id] }), false), + K::IndexUpdate => (Break(BatchKind::IndexUpdate { id: task_id }), false), + K::IndexSwap => (Break(BatchKind::IndexSwap { id: task_id }), false), + K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false), + K::DocumentImport { method, allow_index_creation, primary_key: pk } + if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() => + { + ( + Continue(BatchKind::DocumentOperation { + method, + allow_index_creation, + primary_key: pk, + operation_ids: vec![task_id], + }), + allow_index_creation, + ) + } + // if the primary key set in the task was different than ours we should stop and make this batch fail asap. + K::DocumentImport { method, allow_index_creation, primary_key } => ( + Break(BatchKind::DocumentOperation { + method, + allow_index_creation, + primary_key, + operation_ids: vec![task_id], + }), + allow_index_creation, + ), + K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false), + K::DocumentDeletion { by_filter: includes_by_filter } => ( + Continue(BatchKind::DocumentDeletion { + deletion_ids: vec![task_id], + includes_by_filter, + }), + false, + ), + K::Settings { allow_index_creation } => ( + Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }), + allow_index_creation, + ), + } + } + + /// Returns a `ControlFlow::Break` if you must stop right now. + /// The boolean tell you if an index has been created by the batched task. + /// To ease the writing of the code. `true` can be returned when you don't need to create an index + /// but false can't be returned if you needs to create an index. + #[rustfmt::skip] + fn accumulate(self, id: TaskId, kind: AutobatchKind, index_already_exists: bool, primary_key: Option<&str>) -> ControlFlow { + use AutobatchKind as K; + + match (self, kind) { + // We don't batch any of these operations + (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break(this), + // We must not batch tasks that don't have the same index creation rights if the index doesn't already exists. + (this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => { + Break(this) + }, + // NOTE: We need to negate the whole condition since we're checking if we need to break instead of continue. + // I wrote it this way because it's easier to understand than the other way around. + (this, kind) if !( + // 1. If both task don't interact with primary key -> we can continue + (this.primary_key().is_none() && kind.primary_key().is_none()) || + // 2. Else -> + ( + // 2.1 If we already have a primary-key -> + ( + primary_key.is_some() && + // 2.1.1 If the task we're trying to accumulate have a pk it must be equal to our primary key + // 2.1.2 If the task don't have a primary-key -> we can continue + kind.primary_key().map_or(true, |pk| pk == primary_key) + ) || + // 2.2 If we don't have a primary-key -> + ( + // 2.2.1 If both the batch and the task have a primary key they should be equal + // 2.2.2 If the batch is set to Some(None), the task should be too + // 2.2.3 If the batch is set to None -> we can continue + this.primary_key().zip(kind.primary_key()).map_or(true, |(this, kind)| this == kind) + ) + ) + + ) // closing the negation + + => { + Break(this) + }, + // The index deletion can batch with everything but must stop after + ( + BatchKind::DocumentClear { mut ids } + | BatchKind::DocumentDeletion { deletion_ids: mut ids, includes_by_filter: _ } + | BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids } + | BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids }, + K::IndexDeletion, + ) => { + ids.push(id); + Break(BatchKind::IndexDeletion { ids }) + } + ( + BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other }, + K::IndexDeletion, + ) => { + ids.push(id); + ids.append(&mut other); + Break(BatchKind::IndexDeletion { ids }) + } + + ( + BatchKind::DocumentClear { mut ids }, + K::DocumentClear | K::DocumentDeletion { by_filter: _ }, + ) => { + ids.push(id); + Continue(BatchKind::DocumentClear { ids }) + } + ( + this @ BatchKind::DocumentClear { .. }, + K::DocumentImport { .. } | K::Settings { .. }, + ) => Break(this), + ( + BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, mut operation_ids }, + K::DocumentClear, + ) => { + operation_ids.push(id); + Continue(BatchKind::DocumentClear { ids: operation_ids }) + } + + // we can autobatch the same kind of document additions / updates + ( + BatchKind::DocumentOperation { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut operation_ids }, + K::DocumentImport { method: ReplaceDocuments, primary_key: pk, .. }, + ) => { + operation_ids.push(id); + Continue(BatchKind::DocumentOperation { + method: ReplaceDocuments, + allow_index_creation, + operation_ids, + primary_key: pk, + }) + } + ( + BatchKind::DocumentOperation { method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids }, + K::DocumentImport { method: UpdateDocuments, primary_key: pk, .. }, + ) => { + operation_ids.push(id); + Continue(BatchKind::DocumentOperation { + method: UpdateDocuments, + allow_index_creation, + primary_key: pk, + operation_ids, + }) + } + ( + BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids }, + K::DocumentDeletion { by_filter: false }, + ) => { + operation_ids.push(id); + + Continue(BatchKind::DocumentOperation { + method, + allow_index_creation, + primary_key, + operation_ids, + }) + } + // We can't batch a document operation with a delete by filter + ( + this @ BatchKind::DocumentOperation { .. }, + K::DocumentDeletion { by_filter: true }, + ) => { + Break(this) + } + // but we can't autobatch documents if it's not the same kind + // this match branch MUST be AFTER the previous one + ( + this @ BatchKind::DocumentOperation { .. }, + K::DocumentImport { .. }, + ) => Break(this), + + ( + this @ BatchKind::DocumentOperation { .. }, + K::Settings { .. }, + ) => Break(this), + + (BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => { + deletion_ids.push(id); + Continue(BatchKind::DocumentClear { ids: deletion_ids }) + } + // we can't autobatch the deletion and import if the document deletion contained a filter + ( + this @ BatchKind::DocumentDeletion { deletion_ids: _, includes_by_filter: true }, + K::DocumentImport { .. } + ) => Break(this), + // we can autobatch the deletion and import if the index already exists + ( + BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false }, + K::DocumentImport { method, allow_index_creation, primary_key } + ) if index_already_exists => { + deletion_ids.push(id); + + Continue(BatchKind::DocumentOperation { + method, + allow_index_creation, + primary_key, + operation_ids: deletion_ids, + }) + } + // we can autobatch the deletion and import if both can't create an index + ( + BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false }, + K::DocumentImport { method, allow_index_creation, primary_key } + ) if !allow_index_creation => { + deletion_ids.push(id); + + Continue(BatchKind::DocumentOperation { + method, + allow_index_creation, + primary_key, + operation_ids: deletion_ids, + }) + } + // we can't autobatch a deletion and an import if the index does not exists but would be created by an addition + ( + this @ BatchKind::DocumentDeletion { .. }, + K::DocumentImport { .. } + ) => { + Break(this) + } + (BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter }, K::DocumentDeletion { by_filter }) => { + deletion_ids.push(id); + Continue(BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: includes_by_filter | by_filter }) + } + (this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this), + + ( + BatchKind::Settings { settings_ids, allow_index_creation }, + K::DocumentClear, + ) => Continue(BatchKind::ClearAndSettings { + settings_ids, + allow_index_creation, + other: vec![id], + }), + ( + this @ BatchKind::Settings { .. }, + K::DocumentImport { .. } | K::DocumentDeletion { .. }, + ) => Break(this), + ( + BatchKind::Settings { mut settings_ids, allow_index_creation }, + K::Settings { .. }, + ) => { + settings_ids.push(id); + Continue(BatchKind::Settings { + allow_index_creation, + settings_ids, + }) + } + + ( + BatchKind::ClearAndSettings { mut other, settings_ids, allow_index_creation }, + K::DocumentClear, + ) => { + other.push(id); + Continue(BatchKind::ClearAndSettings { + other, + settings_ids, + allow_index_creation, + }) + } + (this @ BatchKind::ClearAndSettings { .. }, K::DocumentImport { .. }) => Break(this), + ( + BatchKind::ClearAndSettings { + mut other, + settings_ids, + allow_index_creation, + }, + K::DocumentDeletion { .. }, + ) => { + other.push(id); + Continue(BatchKind::ClearAndSettings { + other, + settings_ids, + allow_index_creation, + }) + } + ( + BatchKind::ClearAndSettings { mut settings_ids, other, allow_index_creation }, + K::Settings { .. }, + ) => { + settings_ids.push(id); + Continue(BatchKind::ClearAndSettings { + other, + settings_ids, + allow_index_creation, + }) + } + + ( + BatchKind::IndexCreation { .. } + | BatchKind::IndexDeletion { .. } + | BatchKind::IndexUpdate { .. } + | BatchKind::IndexSwap { .. } + | BatchKind::DocumentEdition { .. }, + _, + ) => { + unreachable!() + } + } + } +} + +/// Create a batch from an ordered list of tasks. +/// +/// ## Preconditions +/// 1. The tasks must be enqueued and given in the order in which they were enqueued +/// 2. The tasks must not be prioritised tasks (e.g. task cancellation, dump, snapshot, task deletion) +/// 3. The tasks must all be related to the same index +/// +/// ## Return +/// `None` if the list of tasks is empty. Otherwise, an [`AutoBatch`] that represents +/// a subset of the given tasks. +pub fn autobatch( + enqueued: Vec<(TaskId, KindWithContent)>, + index_already_exists: bool, + primary_key: Option<&str>, +) -> Option<(BatchKind, bool)> { + let mut enqueued = enqueued.into_iter(); + let (id, kind) = enqueued.next()?; + + // index_exist will keep track of if the index should exist at this point after the tasks we batched. + let mut index_exist = index_already_exists; + + let (mut acc, must_create_index) = match BatchKind::new(id, kind, primary_key) { + (Continue(acc), create) => (acc, create), + (Break(acc), create) => return Some((acc, create)), + }; + + // if an index has been created in the previous step we can consider it as existing. + index_exist |= must_create_index; + + for (id, kind) in enqueued { + acc = match acc.accumulate(id, kind.into(), index_exist, primary_key) { + Continue(acc) => acc, + Break(acc) => return Some((acc, must_create_index)), + }; + } + + Some((acc, must_create_index)) +} diff --git a/crates/index-scheduler/src/scheduler/autobatcher_test.rs b/crates/index-scheduler/src/scheduler/autobatcher_test.rs new file mode 100644 index 000000000..1e18b276d --- /dev/null +++ b/crates/index-scheduler/src/scheduler/autobatcher_test.rs @@ -0,0 +1,395 @@ +use meilisearch_types::milli::update::IndexDocumentsMethod::{ + self, ReplaceDocuments, UpdateDocuments, +}; +use meilisearch_types::tasks::{IndexSwap, KindWithContent}; +use uuid::Uuid; + +use self::autobatcher::{autobatch, BatchKind}; +use super::*; +use crate::TaskId; + +#[macro_export] +macro_rules! debug_snapshot { + ($value:expr, @$snapshot:literal) => {{ + let value = format!("{:?}", $value); + meili_snap::snapshot!(value, @$snapshot); + }}; + } + +fn autobatch_from( + index_already_exists: bool, + primary_key: Option<&str>, + input: impl IntoIterator, +) -> Option<(BatchKind, bool)> { + autobatch( + input.into_iter().enumerate().map(|(id, kind)| (id as TaskId, kind)).collect(), + index_already_exists, + primary_key, + ) +} + +fn doc_imp( + method: IndexDocumentsMethod, + allow_index_creation: bool, + primary_key: Option<&str>, +) -> KindWithContent { + KindWithContent::DocumentAdditionOrUpdate { + index_uid: String::from("doggo"), + primary_key: primary_key.map(|pk| pk.to_string()), + method, + content_file: Uuid::new_v4(), + documents_count: 0, + allow_index_creation, + } +} + +fn doc_del() -> KindWithContent { + KindWithContent::DocumentDeletion { + index_uid: String::from("doggo"), + documents_ids: Vec::new(), + } +} + +fn doc_del_fil() -> KindWithContent { + KindWithContent::DocumentDeletionByFilter { + index_uid: String::from("doggo"), + filter_expr: serde_json::json!("cuteness > 100"), + } +} + +fn doc_clr() -> KindWithContent { + KindWithContent::DocumentClear { index_uid: String::from("doggo") } +} + +fn settings(allow_index_creation: bool) -> KindWithContent { + KindWithContent::SettingsUpdate { + index_uid: String::from("doggo"), + new_settings: Default::default(), + is_deletion: false, + allow_index_creation, + } +} + +fn idx_create() -> KindWithContent { + KindWithContent::IndexCreation { index_uid: String::from("doggo"), primary_key: None } +} + +fn idx_update() -> KindWithContent { + KindWithContent::IndexUpdate { index_uid: String::from("doggo"), primary_key: None } +} + +fn idx_del() -> KindWithContent { + KindWithContent::IndexDeletion { index_uid: String::from("doggo") } +} + +fn idx_swap() -> KindWithContent { + KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: (String::from("doggo"), String::from("catto")) }], + } +} + +#[test] +fn autobatch_simple_operation_together() { + // we can autobatch one or multiple `ReplaceDocuments` together. + // if the index exists. + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))"); + + // if it doesn't exists. + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + + // we can autobatch one or multiple `UpdateDocuments` together. + // if the index exists. + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))"); + + // if it doesn't exists. + debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))"); + + // we can autobatch one or multiple DocumentDeletion together + debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))"); + + // we can autobatch one or multiple DocumentDeletionByFilter together + debug_snapshot!(autobatch_from(true, None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))"); + + // we can autobatch one or multiple Settings together + debug_snapshot!(autobatch_from(true, None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))"); + + debug_snapshot!(autobatch_from(false,None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false,None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))"); + + // We can autobatch document addition with document deletion + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + // And the other way around + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + + // But we can't autobatch document addition with document deletion by filter + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + // And the other way around + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); +} + +#[test] +fn simple_document_operation_dont_autobatch_with_other() { + // addition, updates and deletion by filter can't batch together + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); +} + +#[test] +fn document_addition_doesnt_batch_with_settings() { + // simple case + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + + // multiple settings and doc addition + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + + // addition and setting unordered + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + + // Doesn't batch with other forbidden operations + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); +} + +#[test] +fn clear_and_additions() { + // these two doesn't need to batch + debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))"); + + // Basic use case + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); + + // This batch kind doesn't mix with other document addition + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); + + // But you can batch multiple clear together + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))"); +} + +#[test] +fn clear_and_additions_and_settings() { + // A clear don't need to autobatch the settings that happens AFTER there is no documents + debug_snapshot!(autobatch_from(true, None, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))"); + + debug_snapshot!(autobatch_from(true, None, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); +} + +#[test] +fn anything_and_index_deletion() { + // The `IndexDeletion` doesn't batch with anything that happens AFTER. + debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))"); + + debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))"); + + // The index deletion can accept almost any type of `BatchKind` and transform it to an `IndexDeletion`. + // First, the basic cases + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); +} + +#[test] +fn allowed_and_disallowed_index_creation() { + // `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists. + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + + // batch deletion and addition + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); +} + +#[test] +fn autobatch_primary_key() { + // ==> If I have a pk + // With a single update + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + + // With a multiple updates + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); + + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + + // ==> If I don't have a pk + // With a single update + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + + // With a multiple updates + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); +} diff --git a/crates/index-scheduler/src/scheduler/create_batch.rs b/crates/index-scheduler/src/scheduler/create_batch.rs new file mode 100644 index 000000000..b224ee6eb --- /dev/null +++ b/crates/index-scheduler/src/scheduler/create_batch.rs @@ -0,0 +1,539 @@ +use std::fmt; + +use meilisearch_types::heed::RoTxn; +use meilisearch_types::milli::update::IndexDocumentsMethod; +use meilisearch_types::settings::{Settings, Unchecked}; +use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; +use roaring::RoaringBitmap; +use uuid::Uuid; + +use super::autobatcher::{self, BatchKind}; +use crate::utils::ProcessingBatch; +use crate::{Error, IndexScheduler, Result}; + +/// Represents a combination of tasks that can all be processed at the same time. +/// +/// A batch contains the set of tasks that it represents (accessible through +/// [`self.ids()`](Batch::ids)), as well as additional information on how to +/// be processed. +#[derive(Debug)] +pub(crate) enum Batch { + TaskCancelation { + /// The task cancelation itself. + task: Task, + }, + TaskDeletions(Vec), + SnapshotCreation(Vec), + Dump(Task), + IndexOperation { + op: IndexOperation, + must_create_index: bool, + }, + IndexCreation { + index_uid: String, + primary_key: Option, + task: Task, + }, + IndexUpdate { + index_uid: String, + primary_key: Option, + task: Task, + }, + IndexDeletion { + index_uid: String, + tasks: Vec, + index_has_been_created: bool, + }, + IndexSwap { + task: Task, + }, +} + +#[derive(Debug)] +pub(crate) enum DocumentOperation { + Add(Uuid), + Delete(Vec), +} + +/// A [batch](Batch) that combines multiple tasks operating on an index. +#[derive(Debug)] +pub(crate) enum IndexOperation { + DocumentOperation { + index_uid: String, + primary_key: Option, + method: IndexDocumentsMethod, + operations: Vec, + tasks: Vec, + }, + DocumentEdition { + index_uid: String, + task: Task, + }, + DocumentDeletion { + index_uid: String, + tasks: Vec, + }, + DocumentClear { + index_uid: String, + tasks: Vec, + }, + Settings { + index_uid: String, + // The boolean indicates if it's a settings deletion or creation. + settings: Vec<(bool, Settings)>, + tasks: Vec, + }, + DocumentClearAndSetting { + index_uid: String, + cleared_tasks: Vec, + + // The boolean indicates if it's a settings deletion or creation. + settings: Vec<(bool, Settings)>, + settings_tasks: Vec, + }, +} + +impl Batch { + /// Return the task ids associated with this batch. + pub fn ids(&self) -> RoaringBitmap { + match self { + Batch::TaskCancelation { task, .. } + | Batch::Dump(task) + | Batch::IndexCreation { task, .. } + | Batch::IndexUpdate { task, .. } => { + RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() + } + Batch::SnapshotCreation(tasks) + | Batch::TaskDeletions(tasks) + | Batch::IndexDeletion { tasks, .. } => { + RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid)) + } + Batch::IndexOperation { op, .. } => match op { + IndexOperation::DocumentOperation { tasks, .. } + | IndexOperation::Settings { tasks, .. } + | IndexOperation::DocumentDeletion { tasks, .. } + | IndexOperation::DocumentClear { tasks, .. } => { + RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid)) + } + IndexOperation::DocumentEdition { task, .. } => { + RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() + } + IndexOperation::DocumentClearAndSetting { + cleared_tasks: tasks, + settings_tasks: other, + .. + } => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)), + }, + Batch::IndexSwap { task } => { + RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() + } + } + } + + /// Return the index UID associated with this batch + pub fn index_uid(&self) -> Option<&str> { + use Batch::*; + match self { + TaskCancelation { .. } + | TaskDeletions(_) + | SnapshotCreation(_) + | Dump(_) + | IndexSwap { .. } => None, + IndexOperation { op, .. } => Some(op.index_uid()), + IndexCreation { index_uid, .. } + | IndexUpdate { index_uid, .. } + | IndexDeletion { index_uid, .. } => Some(index_uid), + } + } +} + +impl fmt::Display for Batch { + /// A text used when we debug the profiling reports. + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let index_uid = self.index_uid(); + let tasks = self.ids(); + match self { + Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?, + Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?, + Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?, + Batch::Dump(_) => f.write_str("Dump")?, + Batch::IndexOperation { op, .. } => write!(f, "{op}")?, + Batch::IndexCreation { .. } => f.write_str("IndexCreation")?, + Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?, + Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?, + Batch::IndexSwap { .. } => f.write_str("IndexSwap")?, + }; + match index_uid { + Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")), + None => f.write_fmt(format_args!(" from tasks: {tasks:?}")), + } + } +} + +impl IndexOperation { + pub fn index_uid(&self) -> &str { + match self { + IndexOperation::DocumentOperation { index_uid, .. } + | IndexOperation::DocumentEdition { index_uid, .. } + | IndexOperation::DocumentDeletion { index_uid, .. } + | IndexOperation::DocumentClear { index_uid, .. } + | IndexOperation::Settings { index_uid, .. } + | IndexOperation::DocumentClearAndSetting { index_uid, .. } => index_uid, + } + } +} + +impl fmt::Display for IndexOperation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + IndexOperation::DocumentOperation { .. } => { + f.write_str("IndexOperation::DocumentOperation") + } + IndexOperation::DocumentEdition { .. } => { + f.write_str("IndexOperation::DocumentEdition") + } + IndexOperation::DocumentDeletion { .. } => { + f.write_str("IndexOperation::DocumentDeletion") + } + IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"), + IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"), + IndexOperation::DocumentClearAndSetting { .. } => { + f.write_str("IndexOperation::DocumentClearAndSetting") + } + } + } +} + +impl IndexScheduler { + /// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`]. + /// + /// ## Arguments + /// - `rtxn`: read transaction + /// - `index_uid`: name of the index affected by the operations of the autobatch + /// - `batch`: the result of the autobatcher + pub(crate) fn create_next_batch_index( + &self, + rtxn: &RoTxn, + index_uid: String, + batch: BatchKind, + current_batch: &mut ProcessingBatch, + must_create_index: bool, + ) -> Result> { + match batch { + BatchKind::DocumentClear { ids } => Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentClear { + tasks: self.queue.get_existing_tasks_for_processing_batch( + rtxn, + current_batch, + ids, + )?, + index_uid, + }, + must_create_index, + })), + BatchKind::DocumentEdition { id } => { + let mut task = + self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + current_batch.processing(Some(&mut task)); + match &task.kind { + KindWithContent::DocumentEdition { index_uid, .. } => { + Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentEdition { + index_uid: index_uid.clone(), + task, + }, + must_create_index: false, + })) + } + _ => unreachable!(), + } + } + BatchKind::DocumentOperation { method, operation_ids, .. } => { + let tasks = self.queue.get_existing_tasks_for_processing_batch( + rtxn, + current_batch, + operation_ids, + )?; + let primary_key = tasks + .iter() + .find_map(|task| match task.kind { + KindWithContent::DocumentAdditionOrUpdate { ref primary_key, .. } => { + // we want to stop on the first document addition + Some(primary_key.clone()) + } + KindWithContent::DocumentDeletion { .. } => None, + _ => unreachable!(), + }) + .flatten(); + + let mut operations = Vec::new(); + + for task in tasks.iter() { + match task.kind { + KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => { + operations.push(DocumentOperation::Add(content_file)); + } + KindWithContent::DocumentDeletion { ref documents_ids, .. } => { + operations.push(DocumentOperation::Delete(documents_ids.clone())); + } + _ => unreachable!(), + } + } + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentOperation { + index_uid, + primary_key, + method, + operations, + tasks, + }, + must_create_index, + })) + } + BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: _ } => { + let tasks = self.queue.get_existing_tasks_for_processing_batch( + rtxn, + current_batch, + deletion_ids, + )?; + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentDeletion { index_uid, tasks }, + must_create_index, + })) + } + BatchKind::Settings { settings_ids, .. } => { + let tasks = self.queue.get_existing_tasks_for_processing_batch( + rtxn, + current_batch, + settings_ids, + )?; + + let mut settings = Vec::new(); + for task in &tasks { + match task.kind { + KindWithContent::SettingsUpdate { + ref new_settings, is_deletion, .. + } => settings.push((is_deletion, *new_settings.clone())), + _ => unreachable!(), + } + } + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::Settings { index_uid, settings, tasks }, + must_create_index, + })) + } + BatchKind::ClearAndSettings { other, settings_ids, allow_index_creation } => { + let (index_uid, settings, settings_tasks) = match self + .create_next_batch_index( + rtxn, + index_uid, + BatchKind::Settings { settings_ids, allow_index_creation }, + current_batch, + must_create_index, + )? + .unwrap() + { + Batch::IndexOperation { + op: IndexOperation::Settings { index_uid, settings, tasks, .. }, + .. + } => (index_uid, settings, tasks), + _ => unreachable!(), + }; + let (index_uid, cleared_tasks) = match self + .create_next_batch_index( + rtxn, + index_uid, + BatchKind::DocumentClear { ids: other }, + current_batch, + must_create_index, + )? + .unwrap() + { + Batch::IndexOperation { + op: IndexOperation::DocumentClear { index_uid, tasks }, + .. + } => (index_uid, tasks), + _ => unreachable!(), + }; + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentClearAndSetting { + index_uid, + cleared_tasks, + settings, + settings_tasks, + }, + must_create_index, + })) + } + BatchKind::IndexCreation { id } => { + let mut task = + self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + current_batch.processing(Some(&mut task)); + let (index_uid, primary_key) = match &task.kind { + KindWithContent::IndexCreation { index_uid, primary_key } => { + (index_uid.clone(), primary_key.clone()) + } + _ => unreachable!(), + }; + Ok(Some(Batch::IndexCreation { index_uid, primary_key, task })) + } + BatchKind::IndexUpdate { id } => { + let mut task = + self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + current_batch.processing(Some(&mut task)); + let primary_key = match &task.kind { + KindWithContent::IndexUpdate { primary_key, .. } => primary_key.clone(), + _ => unreachable!(), + }; + Ok(Some(Batch::IndexUpdate { index_uid, primary_key, task })) + } + BatchKind::IndexDeletion { ids } => Ok(Some(Batch::IndexDeletion { + index_uid, + index_has_been_created: must_create_index, + tasks: self.queue.get_existing_tasks_for_processing_batch( + rtxn, + current_batch, + ids, + )?, + })), + BatchKind::IndexSwap { id } => { + let mut task = + self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + current_batch.processing(Some(&mut task)); + Ok(Some(Batch::IndexSwap { task })) + } + } + } + + /// Create the next batch to be processed; + /// 1. We get the *last* task to cancel. + /// 2. We get the *next* task to delete. + /// 3. We get the *next* snapshot to process. + /// 4. We get the *next* dump to process. + /// 5. We get the *next* tasks to process for a specific index. + #[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")] + pub(crate) fn create_next_batch( + &self, + rtxn: &RoTxn, + ) -> Result> { + #[cfg(test)] + self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?; + + let batch_id = self.queue.batches.next_batch_id(rtxn)?; + let mut current_batch = ProcessingBatch::new(batch_id); + + let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?; + let to_cancel = self.queue.tasks.get_kind(rtxn, Kind::TaskCancelation)? & enqueued; + + // 1. we get the last task to cancel. + if let Some(task_id) = to_cancel.max() { + let mut task = + self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + current_batch.processing(Some(&mut task)); + return Ok(Some((Batch::TaskCancelation { task }, current_batch))); + } + + // 2. we get the next task to delete + let to_delete = self.queue.tasks.get_kind(rtxn, Kind::TaskDeletion)? & enqueued; + if !to_delete.is_empty() { + let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_delete)?; + current_batch.processing(&mut tasks); + return Ok(Some((Batch::TaskDeletions(tasks), current_batch))); + } + + // 3. we batch the snapshot. + let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued; + if !to_snapshot.is_empty() { + let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?; + current_batch.processing(&mut tasks); + return Ok(Some((Batch::SnapshotCreation(tasks), current_batch))); + } + + // 4. we batch the dumps. + let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued; + if let Some(to_dump) = to_dump.min() { + let mut task = + self.queue.tasks.get_task(rtxn, to_dump)?.ok_or(Error::CorruptedTaskQueue)?; + current_batch.processing(Some(&mut task)); + return Ok(Some((Batch::Dump(task), current_batch))); + } + + // 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task. + let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) }; + let mut task = + self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + + // If the task is not associated with any index, verify that it is an index swap and + // create the batch directly. Otherwise, get the index name associated with the task + // and use the autobatcher to batch the enqueued tasks associated with it + + let index_name = if let Some(&index_name) = task.indexes().first() { + index_name + } else { + assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty())); + current_batch.processing(Some(&mut task)); + return Ok(Some((Batch::IndexSwap { task }, current_batch))); + }; + + let index_already_exists = self.index_mapper.exists(rtxn, index_name)?; + let mut primary_key = None; + if index_already_exists { + let index = self.index_mapper.index(rtxn, index_name)?; + let rtxn = index.read_txn()?; + primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string()); + } + + let index_tasks = self.queue.tasks.index_tasks(rtxn, index_name)? & enqueued; + + // If autobatching is disabled we only take one task at a time. + // Otherwise, we take only a maximum of tasks to create batches. + let tasks_limit = if self.scheduler.autobatching_enabled { + self.scheduler.max_number_of_batched_tasks + } else { + 1 + }; + + let mut enqueued = Vec::new(); + let mut total_size: u64 = 0; + for task_id in index_tasks.into_iter().take(tasks_limit) { + let task = self + .queue + .tasks + .get_task(rtxn, task_id) + .and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?; + + if let Some(uuid) = task.content_uuid() { + let content_size = self.queue.file_store.compute_size(uuid)?; + total_size = total_size.saturating_add(content_size); + } + + if total_size > self.scheduler.batched_tasks_size_limit && !enqueued.is_empty() { + break; + } + + enqueued.push((task.uid, task.kind)); + } + + if let Some((batchkind, create_index)) = + autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref()) + { + return Ok(self + .create_next_batch_index( + rtxn, + index_name.to_string(), + batchkind, + &mut current_batch, + create_index, + )? + .map(|batch| (batch, current_batch))); + } + + // If we found no tasks then we were notified for something that got autobatched + // somehow and there is nothing to do. + Ok(None) + } +} diff --git a/crates/index-scheduler/src/scheduler/mod.rs b/crates/index-scheduler/src/scheduler/mod.rs new file mode 100644 index 000000000..2d20c4d55 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/mod.rs @@ -0,0 +1,349 @@ +mod autobatcher; +#[cfg(test)] +mod autobatcher_test; +mod create_batch; +mod process_batch; +mod process_dump_creation; +mod process_index_operation; +mod process_snapshot_creation; +#[cfg(test)] +mod test; +#[cfg(test)] +mod test_document_addition; +#[cfg(test)] +mod test_embedders; +#[cfg(test)] +mod test_failure; + +use std::path::PathBuf; +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; +use std::sync::Arc; + +use meilisearch_types::error::ResponseError; +use meilisearch_types::milli; +use meilisearch_types::tasks::Status; +use rayon::current_num_threads; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use roaring::RoaringBitmap; +use synchronoise::SignalEvent; + +use crate::processing::{AtomicTaskStep, BatchProgress}; +use crate::{Error, IndexScheduler, IndexSchedulerOptions, Result, TickOutcome}; + +#[derive(Default, Clone, Debug)] +pub struct MustStopProcessing(Arc); + +impl MustStopProcessing { + pub fn get(&self) -> bool { + self.0.load(Ordering::Relaxed) + } + + pub fn must_stop(&self) { + self.0.store(true, Ordering::Relaxed); + } + + pub fn reset(&self) { + self.0.store(false, Ordering::Relaxed); + } +} + +pub struct Scheduler { + /// A boolean that can be set to true to stop the currently processing tasks. + pub must_stop_processing: MustStopProcessing, + + /// Get a signal when a batch needs to be processed. + pub(crate) wake_up: Arc, + + /// Whether auto-batching is enabled or not. + pub(crate) autobatching_enabled: bool, + + /// The maximum number of tasks that will be batched together. + pub(crate) max_number_of_batched_tasks: usize, + + /// The maximum size, in bytes, of tasks in a batch. + pub(crate) batched_tasks_size_limit: u64, + + /// The path used to create the dumps. + pub(crate) dumps_path: PathBuf, + + /// The path used to create the snapshots. + pub(crate) snapshots_path: PathBuf, + + /// The path to the folder containing the auth LMDB env. + pub(crate) auth_path: PathBuf, + + /// The path to the version file of Meilisearch. + pub(crate) version_file_path: PathBuf, +} + +impl Scheduler { + pub(crate) fn private_clone(&self) -> Scheduler { + Scheduler { + must_stop_processing: self.must_stop_processing.clone(), + wake_up: self.wake_up.clone(), + autobatching_enabled: self.autobatching_enabled, + max_number_of_batched_tasks: self.max_number_of_batched_tasks, + batched_tasks_size_limit: self.batched_tasks_size_limit, + dumps_path: self.dumps_path.clone(), + snapshots_path: self.snapshots_path.clone(), + auth_path: self.auth_path.clone(), + version_file_path: self.version_file_path.clone(), + } + } + + pub fn new(options: &IndexSchedulerOptions) -> Scheduler { + Scheduler { + must_stop_processing: MustStopProcessing::default(), + // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things + wake_up: Arc::new(SignalEvent::auto(true)), + autobatching_enabled: options.autobatching_enabled, + max_number_of_batched_tasks: options.max_number_of_batched_tasks, + batched_tasks_size_limit: options.batched_tasks_size_limit, + dumps_path: options.dumps_path.clone(), + snapshots_path: options.snapshots_path.clone(), + auth_path: options.auth_path.clone(), + version_file_path: options.version_file_path.clone(), + } + } +} + +impl IndexScheduler { + /// Perform one iteration of the run loop. + /// + /// 1. See if we need to cleanup the task queue + /// 2. Find the next batch of tasks to be processed. + /// 3. Update the information of these tasks following the start of their processing. + /// 4. Update the in-memory list of processed tasks accordingly. + /// 5. Process the batch: + /// - perform the actions of each batched task + /// - update the information of each batched task following the end + /// of their processing. + /// 6. Reset the in-memory list of processed tasks. + /// + /// Returns the number of processed tasks. + pub(crate) fn tick(&self) -> Result { + #[cfg(test)] + { + *self.run_loop_iteration.write().unwrap() += 1; + self.breakpoint(crate::test_utils::Breakpoint::Start); + } + + if self.cleanup_enabled { + let mut wtxn = self.env.write_txn()?; + self.queue.cleanup_task_queue(&mut wtxn)?; + wtxn.commit()?; + } + + let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; + let (batch, mut processing_batch) = + match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? { + Some(batch) => batch, + None => return Ok(TickOutcome::WaitForSignal), + }; + let index_uid = batch.index_uid().map(ToOwned::to_owned); + drop(rtxn); + + // 1. store the starting date with the bitmap of processing tasks. + let mut ids = batch.ids(); + let processed_tasks = ids.len(); + + // We reset the must_stop flag to be sure that we don't stop processing tasks + self.scheduler.must_stop_processing.reset(); + let progress = self + .processing_tasks + .write() + .unwrap() + // We can clone the processing batch here because we don't want its modification to affect the view of the processing batches + .start_processing(processing_batch.clone(), ids.clone()); + + #[cfg(test)] + self.breakpoint(crate::test_utils::Breakpoint::BatchCreated); + + // 2. Process the tasks + let res = { + let cloned_index_scheduler = self.private_clone(); + let processing_batch = &mut processing_batch; + let progress = progress.clone(); + std::thread::scope(|s| { + let handle = std::thread::Builder::new() + .name(String::from("batch-operation")) + .spawn_scoped(s, move || { + cloned_index_scheduler.process_batch(batch, processing_batch, progress) + }) + .unwrap(); + handle.join().unwrap_or(Err(Error::ProcessBatchPanicked)) + }) + }; + + // Reset the currently updating index to relinquish the index handle + self.index_mapper.set_currently_updating_index(None); + + #[cfg(test)] + self.maybe_fail(crate::test_utils::FailureLocation::AcquiringWtxn)?; + + progress.update_progress(BatchProgress::WritingTasksToDisk); + processing_batch.finished(); + let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; + let mut canceled = RoaringBitmap::new(); + + match res { + Ok(tasks) => { + #[cfg(test)] + self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded); + + let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32); + progress.update_progress(task_progress_obj); + let mut success = 0; + let mut failure = 0; + let mut canceled_by = None; + + #[allow(unused_variables)] + for (i, mut task) in tasks.into_iter().enumerate() { + task_progress.fetch_add(1, Ordering::Relaxed); + processing_batch.update(&mut task); + if task.status == Status::Canceled { + canceled.insert(task.uid); + canceled_by = task.canceled_by; + } + + #[cfg(test)] + self.maybe_fail( + crate::test_utils::FailureLocation::UpdatingTaskAfterProcessBatchSuccess { + task_uid: i as u32, + }, + )?; + + match task.error { + Some(_) => failure += 1, + None => success += 1, + } + + self.queue + .tasks + .update_task(&mut wtxn, &task) + .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; + } + if let Some(canceled_by) = canceled_by { + self.queue.tasks.canceled_by.put(&mut wtxn, &canceled_by, &canceled)?; + } + tracing::info!("A batch of tasks was successfully completed with {success} successful tasks and {failure} failed tasks."); + } + // If we have an abortion error we must stop the tick here and re-schedule tasks. + Err(Error::Milli { + error: milli::Error::InternalError(milli::InternalError::AbortedIndexation), + .. + }) + | Err(Error::AbortedTask) => { + #[cfg(test)] + self.breakpoint(crate::test_utils::Breakpoint::AbortedIndexation); + wtxn.abort(); + + tracing::info!("A batch of tasks was aborted."); + // We make sure that we don't call `stop_processing` on the `processing_tasks`, + // this is because we want to let the next tick call `create_next_batch` and keep + // the `started_at` date times and `processings` of the current processing tasks. + // This date time is used by the task cancelation to store the right `started_at` + // date in the task on disk. + return Ok(TickOutcome::TickAgain(0)); + } + // If an index said it was full, we need to: + // 1. identify which index is full + // 2. close the associated environment + // 3. resize it + // 4. re-schedule tasks + Err(Error::Milli { + error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached), + .. + }) if index_uid.is_some() => { + // fixme: add index_uid to match to avoid the unwrap + let index_uid = index_uid.unwrap(); + // fixme: handle error more gracefully? not sure when this could happen + self.index_mapper.resize_index(&wtxn, &index_uid)?; + wtxn.abort(); + + tracing::info!("The max database size was reached. Resizing the index."); + + return Ok(TickOutcome::TickAgain(0)); + } + // In case of a failure we must get back and patch all the tasks with the error. + Err(err) => { + #[cfg(test)] + self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchFailed); + let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32); + progress.update_progress(task_progress_obj); + + let error: ResponseError = err.into(); + for id in ids.iter() { + task_progress.fetch_add(1, Ordering::Relaxed); + let mut task = self + .queue + .tasks + .get_task(&wtxn, id) + .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? + .ok_or(Error::CorruptedTaskQueue)?; + task.status = Status::Failed; + task.error = Some(error.clone()); + task.details = task.details.map(|d| d.to_failed()); + processing_batch.update(&mut task); + + #[cfg(test)] + self.maybe_fail( + crate::test_utils::FailureLocation::UpdatingTaskAfterProcessBatchFailure, + )?; + + tracing::error!("Batch failed {}", error); + + self.queue + .tasks + .update_task(&mut wtxn, &task) + .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; + } + } + } + + // We must re-add the canceled task so they're part of the same batch. + ids |= canceled; + self.queue.write_batch(&mut wtxn, processing_batch, &ids)?; + + #[cfg(test)] + self.maybe_fail(crate::test_utils::FailureLocation::CommittingWtxn)?; + + wtxn.commit().map_err(Error::HeedTransaction)?; + + // We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task + // and then become « not found » for some time until the commit everything is written and the final commit is made. + self.processing_tasks.write().unwrap().stop_processing(); + + // Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart + tracing::debug!("Deleting the update files"); + + //We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap + let idx = AtomicU32::new(0); + (0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> { + let rtxn = self.read_txn()?; + while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) { + let task = self + .queue + .tasks + .get_task(&rtxn, id) + .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? + .ok_or(Error::CorruptedTaskQueue)?; + if let Err(e) = self.queue.delete_persisted_task_data(&task) { + tracing::error!( + "Failure to delete the content files associated with task {}. Error: {e}", + task.uid + ); + } + } + Ok(()) + })?; + + // We shouldn't crash the tick function if we can't send data to the webhook. + let _ = self.notify_webhook(&ids); + + #[cfg(test)] + self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing); + + Ok(TickOutcome::TickAgain(processed_tasks)) + } +} diff --git a/crates/index-scheduler/src/scheduler/process_batch.rs b/crates/index-scheduler/src/scheduler/process_batch.rs new file mode 100644 index 000000000..9a86939a4 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/process_batch.rs @@ -0,0 +1,581 @@ +use std::collections::{BTreeSet, HashMap, HashSet}; +use std::sync::atomic::Ordering; + +use meilisearch_types::batches::BatchId; +use meilisearch_types::heed::{RoTxn, RwTxn}; +use meilisearch_types::milli::progress::Progress; +use meilisearch_types::milli::{self}; +use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task}; +use milli::update::Settings as MilliSettings; +use roaring::RoaringBitmap; + +use super::create_batch::Batch; +use crate::processing::{ + AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, + InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress, + UpdateIndexProgress, VariableNameStep, +}; +use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch}; +use crate::{Error, IndexScheduler, Result, TaskId}; + +impl IndexScheduler { + /// Apply the operation associated with the given batch. + /// + /// ## Return + /// The list of tasks that were processed. The metadata of each task in the returned + /// list is updated accordingly, with the exception of the its date fields + /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). + #[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))] + pub(crate) fn process_batch( + &self, + batch: Batch, + current_batch: &mut ProcessingBatch, + progress: Progress, + ) -> Result> { + #[cfg(test)] + { + self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?; + self.maybe_fail(crate::test_utils::FailureLocation::PanicInsideProcessBatch)?; + self.breakpoint(crate::test_utils::Breakpoint::InsideProcessBatch); + } + + match batch { + Batch::TaskCancelation { mut task } => { + // 1. Retrieve the tasks that matched the query at enqueue-time. + let matched_tasks = + if let KindWithContent::TaskCancelation { tasks, query: _ } = &task.kind { + tasks + } else { + unreachable!() + }; + + let rtxn = self.env.read_txn()?; + let mut canceled_tasks = self.cancel_matched_tasks( + &rtxn, + task.uid, + current_batch, + matched_tasks, + &progress, + )?; + + task.status = Status::Succeeded; + match &mut task.details { + Some(Details::TaskCancelation { + matched_tasks: _, + canceled_tasks: canceled_tasks_details, + original_filter: _, + }) => { + *canceled_tasks_details = Some(canceled_tasks.len() as u64); + } + _ => unreachable!(), + } + + canceled_tasks.push(task); + + Ok(canceled_tasks) + } + Batch::TaskDeletions(mut tasks) => { + // 1. Retrieve the tasks that matched the query at enqueue-time. + let mut matched_tasks = RoaringBitmap::new(); + + for task in tasks.iter() { + if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind { + matched_tasks |= tasks; + } else { + unreachable!() + } + } + + let mut wtxn = self.env.write_txn()?; + let mut deleted_tasks = + self.delete_matched_tasks(&mut wtxn, &matched_tasks, &progress)?; + wtxn.commit()?; + + for task in tasks.iter_mut() { + task.status = Status::Succeeded; + let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else { + unreachable!() + }; + + let deleted_tasks_count = deleted_tasks.intersection_len(tasks); + deleted_tasks -= tasks; + + match &mut task.details { + Some(Details::TaskDeletion { + matched_tasks: _, + deleted_tasks, + original_filter: _, + }) => { + *deleted_tasks = Some(deleted_tasks_count); + } + _ => unreachable!(), + } + } + Ok(tasks) + } + Batch::SnapshotCreation(tasks) => self.process_snapshot(progress, tasks), + Batch::Dump(task) => self.process_dump_creation(progress, task), + Batch::IndexOperation { op, must_create_index } => { + let index_uid = op.index_uid().to_string(); + let index = if must_create_index { + // create the index if it doesn't already exist + let wtxn = self.env.write_txn()?; + self.index_mapper.create_index(wtxn, &index_uid, None)? + } else { + let rtxn = self.env.read_txn()?; + self.index_mapper.index(&rtxn, &index_uid)? + }; + + // the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick + self.index_mapper + .set_currently_updating_index(Some((index_uid.clone(), index.clone()))); + + let mut index_wtxn = index.write_txn()?; + let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?; + + { + let span = tracing::trace_span!(target: "indexing::scheduler", "commit"); + let _entered = span.enter(); + + index_wtxn.commit()?; + } + + // if the update processed successfully, we're going to store the new + // stats of the index. Since the tasks have already been processed and + // this is a non-critical operation. If it fails, we should not fail + // the entire batch. + let res = || -> Result<()> { + let index_rtxn = index.read_txn()?; + let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn) + .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; + let mut wtxn = self.env.write_txn()?; + self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?; + wtxn.commit()?; + Ok(()) + }(); + + match res { + Ok(_) => (), + Err(e) => tracing::error!( + error = &e as &dyn std::error::Error, + "Could not write the stats of the index" + ), + } + + Ok(tasks) + } + Batch::IndexCreation { index_uid, primary_key, task } => { + progress.update_progress(CreateIndexProgress::CreatingTheIndex); + + let wtxn = self.env.write_txn()?; + if self.index_mapper.exists(&wtxn, &index_uid)? { + return Err(Error::IndexAlreadyExists(index_uid)); + } + self.index_mapper.create_index(wtxn, &index_uid, None)?; + + self.process_batch( + Batch::IndexUpdate { index_uid, primary_key, task }, + current_batch, + progress, + ) + } + Batch::IndexUpdate { index_uid, primary_key, mut task } => { + progress.update_progress(UpdateIndexProgress::UpdatingTheIndex); + let rtxn = self.env.read_txn()?; + let index = self.index_mapper.index(&rtxn, &index_uid)?; + + if let Some(primary_key) = primary_key.clone() { + let mut index_wtxn = index.write_txn()?; + let mut builder = MilliSettings::new( + &mut index_wtxn, + &index, + self.index_mapper.indexer_config(), + ); + builder.set_primary_key(primary_key); + let must_stop_processing = self.scheduler.must_stop_processing.clone(); + builder + .execute( + |indexing_step| tracing::debug!(update = ?indexing_step), + || must_stop_processing.get(), + ) + .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; + index_wtxn.commit()?; + } + + // drop rtxn before starting a new wtxn on the same db + rtxn.commit()?; + + task.status = Status::Succeeded; + task.details = Some(Details::IndexInfo { primary_key }); + + // if the update processed successfully, we're going to store the new + // stats of the index. Since the tasks have already been processed and + // this is a non-critical operation. If it fails, we should not fail + // the entire batch. + let res = || -> Result<()> { + let mut wtxn = self.env.write_txn()?; + let index_rtxn = index.read_txn()?; + let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn) + .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; + self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?; + wtxn.commit()?; + Ok(()) + }(); + + match res { + Ok(_) => (), + Err(e) => tracing::error!( + error = &e as &dyn std::error::Error, + "Could not write the stats of the index" + ), + } + + Ok(vec![task]) + } + Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => { + progress.update_progress(DeleteIndexProgress::DeletingTheIndex); + let wtxn = self.env.write_txn()?; + + // it's possible that the index doesn't exist + let number_of_documents = || -> Result { + let index = self.index_mapper.index(&wtxn, &index_uid)?; + let index_rtxn = index.read_txn()?; + index + .number_of_documents(&index_rtxn) + .map_err(|e| Error::from_milli(e, Some(index_uid.to_string()))) + }() + .unwrap_or_default(); + + // The write transaction is directly owned and committed inside. + match self.index_mapper.delete_index(wtxn, &index_uid) { + Ok(()) => (), + Err(Error::IndexNotFound(_)) if index_has_been_created => (), + Err(e) => return Err(e), + } + + // We set all the tasks details to the default value. + for task in &mut tasks { + task.status = Status::Succeeded; + task.details = match &task.kind { + KindWithContent::IndexDeletion { .. } => { + Some(Details::ClearAll { deleted_documents: Some(number_of_documents) }) + } + otherwise => otherwise.default_finished_details(), + }; + } + + Ok(tasks) + } + Batch::IndexSwap { mut task } => { + progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap); + + let mut wtxn = self.env.write_txn()?; + let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind { + swaps + } else { + unreachable!() + }; + let mut not_found_indexes = BTreeSet::new(); + for IndexSwap { indexes: (lhs, rhs) } in swaps { + for index in [lhs, rhs] { + let index_exists = self.index_mapper.index_exists(&wtxn, index)?; + if !index_exists { + not_found_indexes.insert(index); + } + } + } + if !not_found_indexes.is_empty() { + if not_found_indexes.len() == 1 { + return Err(Error::SwapIndexNotFound( + not_found_indexes.into_iter().next().unwrap().clone(), + )); + } else { + return Err(Error::SwapIndexesNotFound( + not_found_indexes.into_iter().cloned().collect(), + )); + } + } + progress.update_progress(SwappingTheIndexes::SwappingTheIndexes); + for (step, swap) in swaps.iter().enumerate() { + progress.update_progress(VariableNameStep::new( + format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1), + step as u32, + swaps.len() as u32, + )); + self.apply_index_swap( + &mut wtxn, + &progress, + task.uid, + &swap.indexes.0, + &swap.indexes.1, + )?; + } + wtxn.commit()?; + task.status = Status::Succeeded; + Ok(vec![task]) + } + } + } + + /// Swap the index `lhs` with the index `rhs`. + fn apply_index_swap( + &self, + wtxn: &mut RwTxn, + progress: &Progress, + task_id: u32, + lhs: &str, + rhs: &str, + ) -> Result<()> { + progress.update_progress(InnerSwappingTwoIndexes::RetrieveTheTasks); + // 1. Verify that both lhs and rhs are existing indexes + let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?; + if !index_lhs_exists { + return Err(Error::IndexNotFound(lhs.to_owned())); + } + let index_rhs_exists = self.index_mapper.index_exists(wtxn, rhs)?; + if !index_rhs_exists { + return Err(Error::IndexNotFound(rhs.to_owned())); + } + + // 2. Get the task set for index = name that appeared before the index swap task + let mut index_lhs_task_ids = self.queue.tasks.index_tasks(wtxn, lhs)?; + index_lhs_task_ids.remove_range(task_id..); + let mut index_rhs_task_ids = self.queue.tasks.index_tasks(wtxn, rhs)?; + index_rhs_task_ids.remove_range(task_id..); + + // 3. before_name -> new_name in the task's KindWithContent + progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks); + let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids; + let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32); + progress.update_progress(task_progress); + + for task_id in tasks_to_update { + let mut task = + self.queue.tasks.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + swap_index_uid_in_task(&mut task, (lhs, rhs)); + self.queue.tasks.all_tasks.put(wtxn, &task_id, &task)?; + atomic.fetch_add(1, Ordering::Relaxed); + } + + // 4. remove the task from indexuid = before_name + // 5. add the task to indexuid = after_name + progress.update_progress(InnerSwappingTwoIndexes::UpdateTheIndexesMetadata); + self.queue.tasks.update_index(wtxn, lhs, |lhs_tasks| { + *lhs_tasks -= &index_lhs_task_ids; + *lhs_tasks |= &index_rhs_task_ids; + })?; + self.queue.tasks.update_index(wtxn, rhs, |rhs_tasks| { + *rhs_tasks -= &index_rhs_task_ids; + *rhs_tasks |= &index_lhs_task_ids; + })?; + + // 6. Swap in the index mapper + self.index_mapper.swap(wtxn, lhs, rhs)?; + + Ok(()) + } + + /// Delete each given task from all the databases (if it is deleteable). + /// + /// Return the number of tasks that were actually deleted. + fn delete_matched_tasks( + &self, + wtxn: &mut RwTxn, + matched_tasks: &RoaringBitmap, + progress: &Progress, + ) -> Result { + progress.update_progress(TaskDeletionProgress::DeletingTasksDateTime); + + // 1. Remove from this list the tasks that we are not allowed to delete + let enqueued_tasks = self.queue.tasks.get_status(wtxn, Status::Enqueued)?; + let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone(); + + let all_task_ids = self.queue.tasks.all_task_ids(wtxn)?; + let mut to_delete_tasks = all_task_ids & matched_tasks; + to_delete_tasks -= &**processing_tasks; + to_delete_tasks -= &enqueued_tasks; + + // 2. We now have a list of tasks to delete, delete them + + let mut affected_indexes = HashSet::new(); + let mut affected_statuses = HashSet::new(); + let mut affected_kinds = HashSet::new(); + let mut affected_canceled_by = RoaringBitmap::new(); + // The tasks that have been removed *per batches*. + let mut affected_batches: HashMap = HashMap::new(); + + let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32); + progress.update_progress(task_progress); + for task_id in to_delete_tasks.iter() { + let task = + self.queue.tasks.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + + affected_indexes.extend(task.indexes().into_iter().map(|x| x.to_owned())); + affected_statuses.insert(task.status); + affected_kinds.insert(task.kind.as_kind()); + // Note: don't delete the persisted task data since + // we can only delete succeeded, failed, and canceled tasks. + // In each of those cases, the persisted data is supposed to + // have been deleted already. + utils::remove_task_datetime( + wtxn, + self.queue.tasks.enqueued_at, + task.enqueued_at, + task.uid, + )?; + if let Some(started_at) = task.started_at { + utils::remove_task_datetime( + wtxn, + self.queue.tasks.started_at, + started_at, + task.uid, + )?; + } + if let Some(finished_at) = task.finished_at { + utils::remove_task_datetime( + wtxn, + self.queue.tasks.finished_at, + finished_at, + task.uid, + )?; + } + if let Some(canceled_by) = task.canceled_by { + affected_canceled_by.insert(canceled_by); + } + if let Some(batch_uid) = task.batch_uid { + affected_batches.entry(batch_uid).or_default().insert(task_id); + } + atomic_progress.fetch_add(1, Ordering::Relaxed); + } + + progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata); + let (atomic_progress, task_progress) = AtomicTaskStep::new( + (affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32, + ); + progress.update_progress(task_progress); + for index in affected_indexes.iter() { + self.queue.tasks.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?; + atomic_progress.fetch_add(1, Ordering::Relaxed); + } + + for status in affected_statuses.iter() { + self.queue.tasks.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?; + atomic_progress.fetch_add(1, Ordering::Relaxed); + } + + for kind in affected_kinds.iter() { + self.queue.tasks.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?; + atomic_progress.fetch_add(1, Ordering::Relaxed); + } + + progress.update_progress(TaskDeletionProgress::DeletingTasks); + let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32); + progress.update_progress(task_progress); + for task in to_delete_tasks.iter() { + self.queue.tasks.all_tasks.delete(wtxn, &task)?; + atomic_progress.fetch_add(1, Ordering::Relaxed); + } + for canceled_by in affected_canceled_by { + if let Some(mut tasks) = self.queue.tasks.canceled_by.get(wtxn, &canceled_by)? { + tasks -= &to_delete_tasks; + if tasks.is_empty() { + self.queue.tasks.canceled_by.delete(wtxn, &canceled_by)?; + } else { + self.queue.tasks.canceled_by.put(wtxn, &canceled_by, &tasks)?; + } + } + } + progress.update_progress(TaskDeletionProgress::DeletingBatches); + let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32); + progress.update_progress(batch_progress); + for (batch_id, to_delete_tasks) in affected_batches { + if let Some(mut tasks) = self.queue.batch_to_tasks_mapping.get(wtxn, &batch_id)? { + tasks -= &to_delete_tasks; + // We must remove the batch entirely + if tasks.is_empty() { + self.queue.batches.all_batches.delete(wtxn, &batch_id)?; + self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?; + } + // Anyway, we must remove the batch from all its reverse indexes. + // The only way to do that is to check + + for index in affected_indexes.iter() { + let index_tasks = self.queue.tasks.index_tasks(wtxn, index)?; + let remaining_index_tasks = index_tasks & &tasks; + if remaining_index_tasks.is_empty() { + self.queue.batches.update_index(wtxn, index, |bitmap| { + bitmap.remove(batch_id); + })?; + } + } + + for status in affected_statuses.iter() { + let status_tasks = self.queue.tasks.get_status(wtxn, *status)?; + let remaining_status_tasks = status_tasks & &tasks; + if remaining_status_tasks.is_empty() { + self.queue.batches.update_status(wtxn, *status, |bitmap| { + bitmap.remove(batch_id); + })?; + } + } + + for kind in affected_kinds.iter() { + let kind_tasks = self.queue.tasks.get_kind(wtxn, *kind)?; + let remaining_kind_tasks = kind_tasks & &tasks; + if remaining_kind_tasks.is_empty() { + self.queue.batches.update_kind(wtxn, *kind, |bitmap| { + bitmap.remove(batch_id); + })?; + } + } + } + atomic_progress.fetch_add(1, Ordering::Relaxed); + } + + Ok(to_delete_tasks) + } + + /// Cancel each given task from all the databases (if it is cancelable). + /// + /// Returns the list of tasks that matched the filter and must be written in the database. + fn cancel_matched_tasks( + &self, + rtxn: &RoTxn, + cancel_task_id: TaskId, + current_batch: &mut ProcessingBatch, + matched_tasks: &RoaringBitmap, + progress: &Progress, + ) -> Result> { + progress.update_progress(TaskCancelationProgress::RetrievingTasks); + + // 1. Remove from this list the tasks that we are not allowed to cancel + // Notice that only the _enqueued_ ones are cancelable and we should + // have already aborted the indexation of the _processing_ ones + let cancelable_tasks = self.queue.tasks.get_status(rtxn, Status::Enqueued)?; + let tasks_to_cancel = cancelable_tasks & matched_tasks; + + let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32); + progress.update_progress(progress_obj); + + // 2. We now have a list of tasks to cancel, cancel them + let mut tasks = self.queue.tasks.get_existing_tasks( + rtxn, + tasks_to_cancel.iter().inspect(|_| { + task_progress.fetch_add(1, Ordering::Relaxed); + }), + )?; + + progress.update_progress(TaskCancelationProgress::UpdatingTasks); + let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32); + progress.update_progress(progress_obj); + for task in tasks.iter_mut() { + task.status = Status::Canceled; + task.canceled_by = Some(cancel_task_id); + task.details = task.details.as_ref().map(|d| d.to_failed()); + current_batch.processing(Some(task)); + task_progress.fetch_add(1, Ordering::Relaxed); + } + + Ok(tasks) + } +} diff --git a/crates/index-scheduler/src/scheduler/process_dump_creation.rs b/crates/index-scheduler/src/scheduler/process_dump_creation.rs new file mode 100644 index 000000000..3fd5c795b --- /dev/null +++ b/crates/index-scheduler/src/scheduler/process_dump_creation.rs @@ -0,0 +1,236 @@ +use std::fs::File; +use std::io::BufWriter; +use std::sync::atomic::Ordering; + +use dump::IndexMetadata; +use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; +use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; +use meilisearch_types::milli::progress::Progress; +use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; +use meilisearch_types::milli::{self}; +use meilisearch_types::tasks::{Details, KindWithContent, Status, Task}; +use time::macros::format_description; +use time::OffsetDateTime; + +use crate::processing::{ + AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress, VariableNameStep, +}; +use crate::{Error, IndexScheduler, Result}; + +impl IndexScheduler { + pub(super) fn process_dump_creation( + &self, + progress: Progress, + mut task: Task, + ) -> Result> { + progress.update_progress(DumpCreationProgress::StartTheDumpCreation); + let started_at = OffsetDateTime::now_utc(); + let (keys, instance_uid) = + if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind { + (keys, instance_uid) + } else { + unreachable!(); + }; + let dump = dump::DumpWriter::new(*instance_uid)?; + + // 1. dump the keys + progress.update_progress(DumpCreationProgress::DumpTheApiKeys); + let mut dump_keys = dump.create_keys()?; + for key in keys { + dump_keys.push_key(key)?; + } + dump_keys.flush()?; + + let rtxn = self.env.read_txn()?; + + // 2. dump the tasks + progress.update_progress(DumpCreationProgress::DumpTheTasks); + let mut dump_tasks = dump.create_tasks_queue()?; + + let (atomic, update_task_progress) = + AtomicTaskStep::new(self.queue.tasks.all_tasks.len(&rtxn)? as u32); + progress.update_progress(update_task_progress); + + for ret in self.queue.tasks.all_tasks.iter(&rtxn)? { + if self.scheduler.must_stop_processing.get() { + return Err(Error::AbortedTask); + } + + let (_, mut t) = ret?; + let status = t.status; + let content_file = t.content_uuid(); + + // In the case we're dumping ourselves we want to be marked as finished + // to not loop over ourselves indefinitely. + if t.uid == task.uid { + let finished_at = OffsetDateTime::now_utc(); + + // We're going to fake the date because we don't know if everything is going to go well. + // But we need to dump the task as finished and successful. + // If something fail everything will be set appropriately in the end. + t.status = Status::Succeeded; + t.started_at = Some(started_at); + t.finished_at = Some(finished_at); + } + let mut dump_content_file = dump_tasks.push_task(&t.into())?; + + // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. + if let Some(content_file) = content_file { + if self.scheduler.must_stop_processing.get() { + return Err(Error::AbortedTask); + } + if status == Status::Enqueued { + let content_file = self.queue.file_store.get_update(content_file)?; + + let reader = DocumentsBatchReader::from_reader(content_file) + .map_err(|e| Error::from_milli(e.into(), None))?; + + let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index(); + + while let Some(doc) = + cursor.next_document().map_err(|e| Error::from_milli(e.into(), None))? + { + dump_content_file.push_document( + &obkv_to_object(doc, &documents_batch_index) + .map_err(|e| Error::from_milli(e, None))?, + )?; + } + dump_content_file.flush()?; + } + } + atomic.fetch_add(1, Ordering::Relaxed); + } + dump_tasks.flush()?; + + // 3. Dump the indexes + progress.update_progress(DumpCreationProgress::DumpTheIndexes); + let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32; + let mut count = 0; + let () = self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> { + progress.update_progress(VariableNameStep::new(uid.to_string(), count, nb_indexes)); + count += 1; + + let rtxn = index.read_txn()?; + let metadata = IndexMetadata { + uid: uid.to_owned(), + primary_key: index.primary_key(&rtxn)?.map(String::from), + created_at: index + .created_at(&rtxn) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?, + updated_at: index + .updated_at(&rtxn) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?, + }; + let mut index_dumper = dump.create_index(uid, &metadata)?; + + let fields_ids_map = index.fields_ids_map(&rtxn)?; + let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); + let embedding_configs = index + .embedding_configs(&rtxn) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + + let nb_documents = index + .number_of_documents(&rtxn) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))? + as u32; + let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents); + progress.update_progress(update_document_progress); + let documents = index + .all_documents(&rtxn) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + // 3.1. Dump the documents + for ret in documents { + if self.scheduler.must_stop_processing.get() { + return Err(Error::AbortedTask); + } + + let (id, doc) = ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + + let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + + 'inject_vectors: { + let embeddings = index + .embeddings(&rtxn, id) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + + if embeddings.is_empty() { + break 'inject_vectors; + } + + let vectors = document + .entry(RESERVED_VECTORS_FIELD_NAME.to_owned()) + .or_insert(serde_json::Value::Object(Default::default())); + + let serde_json::Value::Object(vectors) = vectors else { + let user_err = + milli::Error::UserError(milli::UserError::InvalidVectorsMapType { + document_id: { + if let Ok(Some(Ok(index))) = index + .external_id_of(&rtxn, std::iter::once(id)) + .map(|it| it.into_iter().next()) + { + index + } else { + format!("internal docid={id}") + } + }, + value: vectors.clone(), + }); + + return Err(Error::from_milli(user_err, Some(uid.to_string()))); + }; + + for (embedder_name, embeddings) in embeddings { + let user_provided = embedding_configs + .iter() + .find(|conf| conf.name == embedder_name) + .is_some_and(|conf| conf.user_provided.contains(id)); + let embeddings = ExplicitVectors { + embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors( + embeddings, + )), + regenerate: !user_provided, + }; + vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap()); + } + } + + index_dumper.push_document(&document)?; + atomic.fetch_add(1, Ordering::Relaxed); + } + + // 3.2. Dump the settings + let settings = meilisearch_types::settings::settings( + index, + &rtxn, + meilisearch_types::settings::SecretPolicy::RevealSecrets, + ) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + index_dumper.settings(&settings)?; + Ok(()) + })?; + + // 4. Dump experimental feature settings + progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures); + let features = self.features().runtime_features(); + dump.create_experimental_features(features)?; + + let dump_uid = started_at.format(format_description!( + "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" + )).unwrap(); + + if self.scheduler.must_stop_processing.get() { + return Err(Error::AbortedTask); + } + progress.update_progress(DumpCreationProgress::CompressTheDump); + let path = self.scheduler.dumps_path.join(format!("{}.dump", dump_uid)); + let file = File::create(path)?; + dump.persist_to(BufWriter::new(file))?; + + // if we reached this step we can tell the scheduler we succeeded to dump ourselves. + task.status = Status::Succeeded; + task.details = Some(Details::Dump { dump_uid: Some(dump_uid) }); + Ok(vec![task]) + } +} diff --git a/crates/index-scheduler/src/scheduler/process_index_operation.rs b/crates/index-scheduler/src/scheduler/process_index_operation.rs new file mode 100644 index 000000000..eff3740a0 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/process_index_operation.rs @@ -0,0 +1,527 @@ +use bumpalo::collections::CollectIn; +use bumpalo::Bump; +use meilisearch_types::heed::RwTxn; +use meilisearch_types::milli::documents::PrimaryKey; +use meilisearch_types::milli::progress::Progress; +use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction}; +use meilisearch_types::milli::update::DocumentAdditionResult; +use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder}; +use meilisearch_types::settings::apply_settings_to_builder; +use meilisearch_types::tasks::{Details, KindWithContent, Status, Task}; +use meilisearch_types::Index; +use roaring::RoaringBitmap; + +use super::create_batch::{DocumentOperation, IndexOperation}; +use crate::processing::{ + DocumentDeletionProgress, DocumentEditionProgress, DocumentOperationProgress, SettingsProgress, +}; +use crate::{Error, IndexScheduler, Result}; + +impl IndexScheduler { + /// Process the index operation on the given index. + /// + /// ## Return + /// The list of processed tasks. + #[tracing::instrument( + level = "trace", + skip(self, index_wtxn, index, progress), + target = "indexing::scheduler" + )] + pub(crate) fn apply_index_operation<'i>( + &self, + index_wtxn: &mut RwTxn<'i>, + index: &'i Index, + operation: IndexOperation, + progress: Progress, + ) -> Result> { + let indexer_alloc = Bump::new(); + + let started_processing_at = std::time::Instant::now(); + let must_stop_processing = self.scheduler.must_stop_processing.clone(); + + match operation { + IndexOperation::DocumentClear { index_uid, mut tasks } => { + let count = milli::update::ClearDocuments::new(index_wtxn, index) + .execute() + .map_err(|e| Error::from_milli(e, Some(index_uid)))?; + + let mut first_clear_found = false; + for task in &mut tasks { + task.status = Status::Succeeded; + // The first document clear will effectively delete every documents + // in the database but the next ones will clear 0 documents. + task.details = match &task.kind { + KindWithContent::DocumentClear { .. } => { + let count = if first_clear_found { 0 } else { count }; + first_clear_found = true; + Some(Details::ClearAll { deleted_documents: Some(count) }) + } + otherwise => otherwise.default_details(), + }; + } + + Ok(tasks) + } + IndexOperation::DocumentOperation { + index_uid, + primary_key, + method, + operations, + mut tasks, + } => { + progress.update_progress(DocumentOperationProgress::RetrievingConfig); + // TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches. + // this is made difficult by the fact we're doing private clones of the index scheduler and sending it + // to a fresh thread. + let mut content_files = Vec::new(); + for operation in &operations { + if let DocumentOperation::Add(content_uuid) = operation { + let content_file = self.queue.file_store.get_update(*content_uuid)?; + let mmap = unsafe { memmap2::Mmap::map(&content_file)? }; + content_files.push(mmap); + } + } + + let rtxn = index.read_txn()?; + let db_fields_ids_map = index.fields_ids_map(&rtxn)?; + let mut new_fields_ids_map = db_fields_ids_map.clone(); + + let mut content_files_iter = content_files.iter(); + let mut indexer = indexer::DocumentOperation::new(method); + let embedders = index + .embedding_configs(index_wtxn) + .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; + let embedders = self.embedders(index_uid.clone(), embedders)?; + for operation in operations { + match operation { + DocumentOperation::Add(_content_uuid) => { + let mmap = content_files_iter.next().unwrap(); + indexer + .add_documents(mmap) + .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; + } + DocumentOperation::Delete(document_ids) => { + let document_ids: bumpalo::collections::vec::Vec<_> = document_ids + .iter() + .map(|s| &*indexer_alloc.alloc_str(s)) + .collect_in(&indexer_alloc); + indexer.delete_documents(document_ids.into_bump_slice()); + } + } + } + + let local_pool; + let indexer_config = self.index_mapper.indexer_config(); + let pool = match &indexer_config.thread_pool { + Some(pool) => pool, + None => { + local_pool = ThreadPoolNoAbortBuilder::new() + .thread_name(|i| format!("indexing-thread-{i}")) + .build() + .unwrap(); + &local_pool + } + }; + + progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges); + let (document_changes, operation_stats, primary_key) = indexer + .into_changes( + &indexer_alloc, + index, + &rtxn, + primary_key.as_deref(), + &mut new_fields_ids_map, + &|| must_stop_processing.get(), + progress.clone(), + ) + .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; + + let mut candidates_count = 0; + for (stats, task) in operation_stats.into_iter().zip(&mut tasks) { + candidates_count += stats.document_count; + match stats.error { + Some(error) => { + task.status = Status::Failed; + task.error = Some(milli::Error::UserError(error).into()); + } + None => task.status = Status::Succeeded, + } + + task.details = match task.details { + Some(Details::DocumentAdditionOrUpdate { received_documents, .. }) => { + Some(Details::DocumentAdditionOrUpdate { + received_documents, + indexed_documents: Some(stats.document_count), + }) + } + Some(Details::DocumentDeletion { provided_ids, .. }) => { + Some(Details::DocumentDeletion { + provided_ids, + deleted_documents: Some(stats.document_count), + }) + } + _ => { + // In the case of a `documentAdditionOrUpdate` or `DocumentDeletion` + // the details MUST be set to either addition or deletion + unreachable!(); + } + } + } + + progress.update_progress(DocumentOperationProgress::Indexing); + if tasks.iter().any(|res| res.error.is_none()) { + indexer::index( + index_wtxn, + index, + pool, + indexer_config.grenad_parameters(), + &db_fields_ids_map, + new_fields_ids_map, + primary_key, + &document_changes, + embedders, + &|| must_stop_processing.get(), + &progress, + ) + .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; + + let addition = DocumentAdditionResult { + indexed_documents: candidates_count, + number_of_documents: index + .number_of_documents(index_wtxn) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, + }; + + tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); + } + + Ok(tasks) + } + IndexOperation::DocumentEdition { index_uid, mut task } => { + progress.update_progress(DocumentEditionProgress::RetrievingConfig); + + let (filter, code) = if let KindWithContent::DocumentEdition { + filter_expr, + context: _, + function, + .. + } = &task.kind + { + (filter_expr, function) + } else { + unreachable!() + }; + + let candidates = match filter.as_ref().map(Filter::from_json) { + Some(Ok(Some(filter))) => filter + .evaluate(index_wtxn, index) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, + None | Some(Ok(None)) => index.documents_ids(index_wtxn)?, + Some(Err(e)) => return Err(Error::from_milli(e, Some(index_uid.clone()))), + }; + + let (original_filter, context, function) = if let Some(Details::DocumentEdition { + original_filter, + context, + function, + .. + }) = task.details + { + (original_filter, context, function) + } else { + // In the case of a `documentEdition` the details MUST be set + unreachable!(); + }; + + if candidates.is_empty() { + task.status = Status::Succeeded; + task.details = Some(Details::DocumentEdition { + original_filter, + context, + function, + deleted_documents: Some(0), + edited_documents: Some(0), + }); + + return Ok(vec![task]); + } + + let rtxn = index.read_txn()?; + let db_fields_ids_map = index.fields_ids_map(&rtxn)?; + let mut new_fields_ids_map = db_fields_ids_map.clone(); + // candidates not empty => index not empty => a primary key is set + let primary_key = index.primary_key(&rtxn)?.unwrap(); + + let primary_key = + PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map) + .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?; + + let result_count = Ok((candidates.len(), candidates.len())) as Result<_>; + + if task.error.is_none() { + let local_pool; + let indexer_config = self.index_mapper.indexer_config(); + let pool = match &indexer_config.thread_pool { + Some(pool) => pool, + None => { + local_pool = ThreadPoolNoAbortBuilder::new() + .thread_name(|i| format!("indexing-thread-{i}")) + .build() + .unwrap(); + &local_pool + } + }; + + let candidates_count = candidates.len(); + progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges); + let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone()); + let document_changes = pool + .install(|| { + indexer + .into_changes(&primary_key) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone()))) + }) + .unwrap()?; + let embedders = index + .embedding_configs(index_wtxn) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; + let embedders = self.embedders(index_uid.clone(), embedders)?; + + progress.update_progress(DocumentEditionProgress::Indexing); + indexer::index( + index_wtxn, + index, + pool, + indexer_config.grenad_parameters(), + &db_fields_ids_map, + new_fields_ids_map, + None, // cannot change primary key in DocumentEdition + &document_changes, + embedders, + &|| must_stop_processing.get(), + &progress, + ) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; + + let addition = DocumentAdditionResult { + indexed_documents: candidates_count, + number_of_documents: index + .number_of_documents(index_wtxn) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, + }; + + tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); + } + + match result_count { + Ok((deleted_documents, edited_documents)) => { + task.status = Status::Succeeded; + task.details = Some(Details::DocumentEdition { + original_filter, + context, + function, + deleted_documents: Some(deleted_documents), + edited_documents: Some(edited_documents), + }); + } + Err(e) => { + task.status = Status::Failed; + task.details = Some(Details::DocumentEdition { + original_filter, + context, + function, + deleted_documents: Some(0), + edited_documents: Some(0), + }); + task.error = Some(e.into()); + } + } + + Ok(vec![task]) + } + IndexOperation::DocumentDeletion { mut tasks, index_uid } => { + progress.update_progress(DocumentDeletionProgress::RetrievingConfig); + + let mut to_delete = RoaringBitmap::new(); + let external_documents_ids = index.external_documents_ids(); + + for task in tasks.iter_mut() { + let before = to_delete.len(); + task.status = Status::Succeeded; + + match &task.kind { + KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => { + for id in documents_ids { + if let Some(id) = external_documents_ids.get(index_wtxn, id)? { + to_delete.insert(id); + } + } + let will_be_removed = to_delete.len() - before; + task.details = Some(Details::DocumentDeletion { + provided_ids: documents_ids.len(), + deleted_documents: Some(will_be_removed), + }); + } + KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } => { + let before = to_delete.len(); + let filter = match Filter::from_json(filter_expr) { + Ok(filter) => filter, + Err(err) => { + // theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens + task.status = Status::Failed; + task.error = Some( + Error::from_milli(err, Some(index_uid.clone())).into(), + ); + None + } + }; + if let Some(filter) = filter { + let candidates = filter + .evaluate(index_wtxn, index) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone()))); + match candidates { + Ok(candidates) => to_delete |= candidates, + Err(err) => { + task.status = Status::Failed; + task.error = Some(err.into()); + } + }; + } + let will_be_removed = to_delete.len() - before; + if let Some(Details::DocumentDeletionByFilter { + original_filter: _, + deleted_documents, + }) = &mut task.details + { + *deleted_documents = Some(will_be_removed); + } else { + // In the case of a `documentDeleteByFilter` the details MUST be set + unreachable!() + } + } + _ => unreachable!(), + } + } + + if to_delete.is_empty() { + return Ok(tasks); + } + + let rtxn = index.read_txn()?; + let db_fields_ids_map = index.fields_ids_map(&rtxn)?; + let mut new_fields_ids_map = db_fields_ids_map.clone(); + + // to_delete not empty => index not empty => primary key set + let primary_key = index.primary_key(&rtxn)?.unwrap(); + + let primary_key = + PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map) + .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?; + + if !tasks.iter().all(|res| res.error.is_some()) { + let local_pool; + let indexer_config = self.index_mapper.indexer_config(); + let pool = match &indexer_config.thread_pool { + Some(pool) => pool, + None => { + local_pool = ThreadPoolNoAbortBuilder::new() + .thread_name(|i| format!("indexing-thread-{i}")) + .build() + .unwrap(); + &local_pool + } + }; + + progress.update_progress(DocumentDeletionProgress::DeleteDocuments); + let mut indexer = indexer::DocumentDeletion::new(); + let candidates_count = to_delete.len(); + indexer.delete_documents_by_docids(to_delete); + let document_changes = indexer.into_changes(&indexer_alloc, primary_key); + let embedders = index + .embedding_configs(index_wtxn) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; + let embedders = self.embedders(index_uid.clone(), embedders)?; + + progress.update_progress(DocumentDeletionProgress::Indexing); + indexer::index( + index_wtxn, + index, + pool, + indexer_config.grenad_parameters(), + &db_fields_ids_map, + new_fields_ids_map, + None, // document deletion never changes primary key + &document_changes, + embedders, + &|| must_stop_processing.get(), + &progress, + ) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; + + let addition = DocumentAdditionResult { + indexed_documents: candidates_count, + number_of_documents: index + .number_of_documents(index_wtxn) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, + }; + + tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); + } + + Ok(tasks) + } + IndexOperation::Settings { index_uid, settings, mut tasks } => { + progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings); + let indexer_config = self.index_mapper.indexer_config(); + let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config); + + for (task, (_, settings)) in tasks.iter_mut().zip(settings) { + let checked_settings = settings.clone().check(); + task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) }); + apply_settings_to_builder(&checked_settings, &mut builder); + + // We can apply the status right now and if an update fail later + // the whole batch will be marked as failed. + task.status = Status::Succeeded; + } + + progress.update_progress(SettingsProgress::ApplyTheSettings); + builder + .execute( + |indexing_step| tracing::debug!(update = ?indexing_step), + || must_stop_processing.get(), + ) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; + + Ok(tasks) + } + IndexOperation::DocumentClearAndSetting { + index_uid, + cleared_tasks, + settings, + settings_tasks, + } => { + let mut import_tasks = self.apply_index_operation( + index_wtxn, + index, + IndexOperation::DocumentClear { + index_uid: index_uid.clone(), + tasks: cleared_tasks, + }, + progress.clone(), + )?; + + let settings_tasks = self.apply_index_operation( + index_wtxn, + index, + IndexOperation::Settings { index_uid, settings, tasks: settings_tasks }, + progress, + )?; + + let mut tasks = settings_tasks; + tasks.append(&mut import_tasks); + Ok(tasks) + } + } + } +} diff --git a/crates/index-scheduler/src/scheduler/process_snapshot_creation.rs b/crates/index-scheduler/src/scheduler/process_snapshot_creation.rs new file mode 100644 index 000000000..c6d6e2dc8 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/process_snapshot_creation.rs @@ -0,0 +1,134 @@ +use std::ffi::OsStr; +use std::fs; +use std::sync::atomic::Ordering; + +use meilisearch_types::heed::CompactionOption; +use meilisearch_types::milli::progress::Progress; +use meilisearch_types::milli::{self}; +use meilisearch_types::tasks::{Status, Task}; +use meilisearch_types::{compression, VERSION_FILE_NAME}; + +use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress, VariableNameStep}; +use crate::{Error, IndexScheduler, Result}; + +impl IndexScheduler { + pub(super) fn process_snapshot( + &self, + progress: Progress, + mut tasks: Vec, + ) -> Result> { + progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation); + + fs::create_dir_all(&self.scheduler.snapshots_path)?; + let temp_snapshot_dir = tempfile::tempdir()?; + + // 1. Snapshot the version file. + let dst = temp_snapshot_dir.path().join(VERSION_FILE_NAME); + fs::copy(&self.scheduler.version_file_path, dst)?; + + // 2. Snapshot the index-scheduler LMDB env + // + // When we call copy_to_file, LMDB opens a read transaction by itself, + // we can't provide our own. It is an issue as we would like to know + // the update files to copy but new ones can be enqueued between the copy + // of the env and the new transaction we open to retrieve the enqueued tasks. + // So we prefer opening a new transaction after copying the env and copy more + // update files than not enough. + // + // Note that there cannot be any update files deleted between those + // two read operations as the task processing is synchronous. + + // 2.1 First copy the LMDB env of the index-scheduler + progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler); + let dst = temp_snapshot_dir.path().join("tasks"); + fs::create_dir_all(&dst)?; + self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; + + // 2.2 Create a read transaction on the index-scheduler + let rtxn = self.env.read_txn()?; + + // 2.3 Create the update files directory + let update_files_dir = temp_snapshot_dir.path().join("update_files"); + fs::create_dir_all(&update_files_dir)?; + + // 2.4 Only copy the update files of the enqueued tasks + progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles); + let enqueued = self.queue.tasks.get_status(&rtxn, Status::Enqueued)?; + let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u32); + progress.update_progress(update_file_progress); + for task_id in enqueued { + let task = + self.queue.tasks.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + if let Some(content_uuid) = task.content_uuid() { + let src = self.queue.file_store.get_update_path(content_uuid); + let dst = update_files_dir.join(content_uuid.to_string()); + fs::copy(src, dst)?; + } + atomic.fetch_add(1, Ordering::Relaxed); + } + + // 3. Snapshot every indexes + progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes); + let index_mapping = self.index_mapper.index_mapping; + let nb_indexes = index_mapping.len(&rtxn)? as u32; + + for (i, result) in index_mapping.iter(&rtxn)?.enumerate() { + let (name, uuid) = result?; + progress.update_progress(VariableNameStep::new(name, i as u32, nb_indexes)); + let index = self.index_mapper.index(&rtxn, name)?; + let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); + fs::create_dir_all(&dst)?; + index + .copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled) + .map_err(|e| Error::from_milli(e, Some(name.to_string())))?; + } + + drop(rtxn); + + // 4. Snapshot the auth LMDB env + progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys); + let dst = temp_snapshot_dir.path().join("auth"); + fs::create_dir_all(&dst)?; + // TODO We can't use the open_auth_store_env function here but we should + let auth = unsafe { + milli::heed::EnvOpenOptions::new() + .map_size(1024 * 1024 * 1024) // 1 GiB + .max_dbs(2) + .open(&self.scheduler.auth_path) + }?; + auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; + + // 5. Copy and tarball the flat snapshot + progress.update_progress(SnapshotCreationProgress::CreateTheTarball); + // 5.1 Find the original name of the database + // TODO find a better way to get this path + let mut base_path = self.env.path().to_owned(); + base_path.pop(); + let db_name = base_path.file_name().and_then(OsStr::to_str).unwrap_or("data.ms"); + + // 5.2 Tarball the content of the snapshot in a tempfile with a .snapshot extension + let snapshot_path = self.scheduler.snapshots_path.join(format!("{}.snapshot", db_name)); + let temp_snapshot_file = tempfile::NamedTempFile::new_in(&self.scheduler.snapshots_path)?; + compression::to_tar_gz(temp_snapshot_dir.path(), temp_snapshot_file.path())?; + let file = temp_snapshot_file.persist(snapshot_path)?; + + // 5.3 Change the permission to make the snapshot readonly + let mut permissions = file.metadata()?.permissions(); + permissions.set_readonly(true); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + #[allow(clippy::non_octal_unix_permissions)] + // rwxrwxrwx + permissions.set_mode(0b100100100); + } + + file.set_permissions(permissions)?; + + for task in &mut tasks { + task.status = Status::Succeeded; + } + + Ok(tasks) + } +} diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-2.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update-2.snap similarity index 77% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-2.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update-2.snap index 2b76f46a6..01a8429c4 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-2.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update-2.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs expression: task.details +snapshot_kind: text --- { "embedders": { diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update-5.snap similarity index 78% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update-5.snap index 061de75a5..7b576aa24 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update-5.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs expression: config.embedder_options +snapshot_kind: text --- { "Rest": { diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update.snap similarity index 77% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update.snap index 2b76f46a6..01a8429c4 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs expression: task.details +snapshot_kind: text --- { "embedders": { diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-15.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-15.snap similarity index 63% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-15.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-15.snap index 540835dfb..ece33e3b4 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-15.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-15.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs expression: doc +snapshot_kind: text --- { "doggo": "Intel", diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-2.snap similarity index 86% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-2.snap index 629ea87dc..025ea4a5e 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-2.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs expression: task.details +snapshot_kind: text --- { "embedders": { diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-22.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-22.snap similarity index 62% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-22.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-22.snap index bc35d84f6..49c5403d4 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-22.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-22.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs expression: doc +snapshot_kind: text --- { "doggo": "kefir", diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-5.snap similarity index 76% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-5.snap index c08aa8116..14fcb3ee9 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-5.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs expression: fakerest_config.embedder_options +snapshot_kind: text --- { "Rest": { diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-8.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-8.snap similarity index 72% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-8.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-8.snap index 712a62c77..76828ad7a 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-8.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-8.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs expression: simple_hf_config.embedder_options +snapshot_kind: text --- { "HuggingFace": { diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors.snap similarity index 86% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors.snap index 629ea87dc..025ea4a5e 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs expression: task.details +snapshot_kind: text --- { "embedders": { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/cancel_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/cancel_processed.snap index f0c382d86..e3da7bd06 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/cancel_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/initial_tasks_enqueued.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/initial_tasks_enqueued.snap index b895bbc7c..51fb88025 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/initial_tasks_enqueued.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/aborted_indexation.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/aborted_indexation.snap index b73714e36..408980c05 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/aborted_indexation.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/cancel_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/cancel_processed.snap index 444b171dd..2a9de78ab 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/cancel_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/first_task_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/first_task_processed.snap index 17265263c..e85755e98 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/first_task_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap index c24c36313..957df00ea 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/after_dump_register.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/after_dump_register.snap index 8821af805..a7c5e5c09 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/after_dump_register.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_processed.snap index dbae3a082..426a649c8 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_registered.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_registered.snap index b9f33e598..fe128e3d3 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_registered.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/aborted_indexation.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/aborted_indexation.snap index 0b9a0d709..26ded73d7 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/aborted_indexation.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_processed.snap index ef6845b05..9a0d8cc88 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_task_registered.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_task_registered.snap index fef6c20f6..6ac809600 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_task_registered.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/initial_task_processing.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/initial_task_processing.snap index 3f45be007..72f0c39eb 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/initial_task_processing.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/registered_the_first_task.snap index 087257e18..63919487a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/cancel_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/cancel_processed.snap index de94da936..56dea5b08 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/cancel_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/initial_task_processed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/initial_task_processed.snap index 78b62979b..9d83368b1 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/initial_task_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/registered_the_first_task.snap index 087257e18..63919487a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap index 3fe1a7d01..58b2a831d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/before_index_creation.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/before_index_creation.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/before_index_creation.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/before_index_creation.snap index 0234a5057..6dc95e1d1 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/before_index_creation.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/before_index_creation.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/both_task_succeeded.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/both_task_succeeded.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/both_task_succeeded.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/both_task_succeeded.snap index 8203e81f4..7de83b538 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/both_task_succeeded.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/both_task_succeeded.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_first_task.snap index 230b5e195..92f24508c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_second_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_second_task.snap index 9b22afff0..5f25c2964 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_third_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_third_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_third_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_third_task.snap index 914660746..0006ee8c0 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_third_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_third_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap index c252d35c9..d7e2f3b07 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap index 830afd854..83604f393 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap index 9d3f29c48..11ec76348 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap index e3627bbd3..cf2b2b691 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap index 322bcf4ab..e1e36fffc 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap index aa047e3ff..0b16ffadd 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap index 8d499b59c..4e5651deb 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap index 423dfb37c..5b829d27e 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap index e5878246d..d4113041a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap index 230b5e195..92f24508c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap index a0148db63..21a6a59f7 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap index bee90a73b..adf9a76fe 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/first.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/first.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/first.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/first.snap index ac18e924d..809273a20 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/first.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/first.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/fourth.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/fourth.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/fourth.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/fourth.snap index 06e63e00e..a871c2baa 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/fourth.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/fourth.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_first_task.snap index 33cea7854..5c8082f72 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap index ebd130966..a22004697 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_second_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_second_task.snap index c53aec0c9..635491dc1 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_third_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_third_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_third_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_third_task.snap index 7679999ce..1d190baca 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_third_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_third_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/second.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/second.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/second.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/second.snap index 632b7a54a..208aa100b 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/second.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/second.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/third.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/third.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/third.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/third.snap index 3a2963654..8977e4cf0 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/third.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/third.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_a.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_a.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_a.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_a.snap index b1c6fde36..dc73ddb0d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_a.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_a.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_b.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_b.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_b.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_b.snap index 065023214..25827aa96 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_b.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_b.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_c.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_c.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_c.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_c.snap index 03b09b928..7b1ad4b9b 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_c.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_c.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_d.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_d.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_d.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_d.snap index 08ecfddc2..aa4b71d67 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_d.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_d.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_processed.snap index bca858559..68934003d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_registered.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_registered.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_registered.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_registered.snap index 234915267..2296dc9f2 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_registered.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_registered.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/second_swap_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/second_swap_processed.snap index 7b5ab6e4b..abc2f2954 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/second_swap_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/third_empty_swap_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/third_empty_swap_processed.snap index 77b1193a5..f75caa10c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/third_empty_swap_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/two_swaps_registered.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/two_swaps_registered.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/two_swaps_registered.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/two_swaps_registered.snap index ccab86904..cb5fd822d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/two_swaps_registered.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/two_swaps_registered.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/after_the_index_creation.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/after_the_index_creation.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/after_the_index_creation.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/after_the_index_creation.snap index 08ecfddc2..aa4b71d67 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/after_the_index_creation.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/after_the_index_creation.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/first_swap_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/first_swap_failed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/first_swap_failed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/first_swap_failed.snap index e8e74d0e3..f7eb4e1e7 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/first_swap_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/first_swap_failed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/initial_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/initial_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/initial_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/initial_tasks_processed.snap index 08ecfddc2..aa4b71d67 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/initial_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/initial_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap index 0d51e242c..2c33bd04a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap index f63c498a5..83c43339f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap index 95d615b1e..dd3ed4c8a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap index 6402982ee..3c4b35d9f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/initial_tasks_enqueued.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/initial_tasks_enqueued.snap index 0d51e242c..2c33bd04a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/initial_tasks_enqueued.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/initial_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/initial_tasks_processed.snap index f63c498a5..83c43339f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/initial_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/task_deletion_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/task_deletion_processed.snap index 3f4ae56d8..9512a8d8d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/task_deletion_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap index aed0a818a..46cbaefc2 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_done.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_done.snap index ae910d44b..b35bcdf1b 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_done.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_enqueued.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_enqueued.snap index 746caa1de..a861fea12 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_enqueued.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_processing.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_processing.snap index 85a0afc46..b3500b8a5 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_processing.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap index e2668fcea..92e37550a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap index 7f08c0575..bdd654672 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_task_is_processing/registered_a_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_task_is_processing/registered_a_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_task_is_processing/registered_a_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/test_task_is_processing/registered_a_task.snap index e3627bbd3..cf2b2b691 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_task_is_processing/registered_a_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_task_is_processing/registered_a_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_register.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/after_register.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_register.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/after_register.snap index d8a689669..42df87d17 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_register.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/after_register.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/after_the_batch_creation.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/after_the_batch_creation.snap index 8beb49145..a3d3deade 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/after_the_batch_creation.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition/once_everything_is_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/once_everything_is_processed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition/once_everything_is_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/once_everything_is_processed.snap index 2357a404f..83e23e8b0 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition/once_everything_is_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/once_everything_is_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/after_processing_the_batch.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/after_processing_the_batch.snap index 1fce684f5..2bc94c1f9 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/after_processing_the_batch.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/documents.snap new file mode 100644 index 000000000..bef7fca61 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/documents.snap @@ -0,0 +1,10 @@ +--- +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text +--- +[ + { + "id": 3, + "doggo": "bork" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/registered_the_first_task.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/registered_the_first_task.snap index b1337b287..c1629dc02 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/registered_the_second_task.snap index 60e2d22be..c8c117b2a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap index ee42e932a..825d74562 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_last_successful_addition.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_last_successful_addition.snap index 0e9e47574..4ffdf8958 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_last_successful_addition.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/documents.snap similarity index 60% rename from crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/documents.snap index 8204d059b..3619a50b5 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/registered_the_first_task.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/registered_the_first_task.snap index 2e96a4614..6faba461a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/registered_the_second_task.snap index d4f8b47b9..257c66390 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap index 5efac0653..2c29d9da7 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap index cdc1f98b7..ce66dc4d1 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/documents.snap similarity index 82% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/documents.snap index 5a839838d..bf6495c9c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap index 24ace66bf..68f4b6701 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap index 230b5e195..03d4e5b16 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap index f937c6805..9c3711061 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap index 28a2a65a5..ed9d02ae2 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap similarity index 82% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap index 5a839838d..bf6495c9c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap index 519646fcb..343c1f77d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap index f842a275e..9aa284128 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap index 33cea7854..6f0f9c782 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap index aa27500a7..d87a73a81 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap index 8b463b588..2fbcc3dc6 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap index 537980795..e8f8d85d3 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap index e342fb2f3..a5e55b95f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap index 9531dd0bf..231352493 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap index 3cdee6f23..aa6956c5f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap index a3609fb1b..e4b176513 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap similarity index 81% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap index cbd8d175a..ae77cfa9d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap index d73d749f6..4b737c1e6 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap index 00c911dae..79c0071ff 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap index 99922b9a0..95466395e 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/documents.snap similarity index 82% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/documents.snap index 5a839838d..bf6495c9c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap index 24ace66bf..68f4b6701 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap index 230b5e195..03d4e5b16 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap index e8ee841ae..9878d5283 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/documents.snap similarity index 53% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/documents.snap index dd1bbf8b0..d8f74d472 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap index 1713c0ac2..bd87c1981 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap index 96e83ac9b..ac50daec7 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap index f54713081..7785e0cb0 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap index 0f24a6715..73c8fcf17 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap index a3a481855..86f301ba8 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/documents.snap new file mode 100644 index 000000000..5022049f1 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/documents.snap @@ -0,0 +1,10 @@ +--- +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text +--- +[ + { + "id": 0, + "doggo": "jean bob" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap index f12ac555b..4d5028d60 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap index b49d3ea64..4350f68ae 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap index 35783d84f..226a1d509 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap index 3eb5c7a4d..c744a7f18 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap new file mode 100644 index 000000000..5022049f1 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap @@ -0,0 +1,10 @@ +--- +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text +--- +[ + { + "id": 0, + "doggo": "jean bob" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap index d01799fc2..86ab07c3c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap index 2c6d29a18..889d65ff9 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap index 3306009aa..4111cb60e 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap index 6d3fabe77..7ef550fd8 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/documents.snap similarity index 68% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/documents.snap index a73c52da5..0a8f2e4e8 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap index 5b304aa24..8bda924d3 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap index b5e113599..f153e2d44 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap index 0f3730932..cdd51b199 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap index 2876c7681..f18858451 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap index 46408e0a7..2fb5363e8 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap similarity index 74% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap index 9c79853fa..1ff106b5b 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap index 4acc5342b..6a7d3617a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap index 828c28298..7a98a0e37 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap index 7e9a02288..7603decab 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/1.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/1.snap index a5fbc024c..85b137b45 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/1.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/2.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/2.snap index d2cfc793b..8bd563e6e 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/2.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/documents.snap similarity index 82% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/documents.snap index 5a839838d..bf6495c9c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap index ba16d64f1..122136e08 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/all_tasks_processed.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/all_tasks_processed.snap index 5c6c711a0..7ecc0e7a9 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/documents.snap new file mode 100644 index 000000000..bf6495c9c --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/documents.snap @@ -0,0 +1,46 @@ +--- +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/five_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/five_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/five_tasks_processed.snap index e03da1332..7b3a9db02 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/five_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/five_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/1.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/1.snap index 5444d0a3e..bb4fb66df 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/1.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/2.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/2.snap index 9e742df7b..0911eb631 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/2.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/documents.snap new file mode 100644 index 000000000..bf6495c9c --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/documents.snap @@ -0,0 +1,46 @@ +--- +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap index 35368e4b3..916b44f96 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/all_tasks_processed.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/all_tasks_processed.snap index ef6e2d0e1..4b005f38e 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/documents.snap new file mode 100644 index 000000000..bf6495c9c --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/documents.snap @@ -0,0 +1,46 @@ +--- +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/five_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/five_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/five_tasks_processed.snap index bfc2e9f42..bf73f9593 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/five_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/five_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap index 773f43c2c..7f08b5d0d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/all_tasks_processed.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/all_tasks_processed.snap index a4649c1eb..ff617008c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/documents.snap new file mode 100644 index 000000000..bf6495c9c --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/documents.snap @@ -0,0 +1,46 @@ +--- +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/five_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/five_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/five_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/five_tasks_processed.snap index 8aba4bd5c..ff492e75e 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/five_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/five_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap index f581defa8..7de6af6b7 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap index 27522376f..68872a141 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap index 28504ffea..1732eee6b 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap index 288f2bc88..3777a7bc8 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap index ff63c0caf..33bd5c0d2 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap index 77367f06b..e5baae150 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors_first_and_embedder_later/documents after initial push.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors_first_and_embedder_later/documents after initial push.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/import_vectors_first_and_embedder_later/documents after initial push.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors_first_and_embedder_later/documents after initial push.snap index e06d09464..3eaf58e17 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors_first_and_embedder_later/documents after initial push.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors_first_and_embedder_later/documents after initial push.snap @@ -1,4 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs +snapshot_kind: text --- [{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"my_doggo_embedder":[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],"unknown embedder":[1,2,3]}},{"id":2,"doggo":"max","_vectors":{"my_doggo_embedder":{"regenerate":false,"embeddings":[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]},"unknown embedder":[4,5]}},{"id":3,"doggo":"marcel","_vectors":{"my_doggo_embedder":{"regenerate":true,"embeddings":[3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]}}},{"id":4,"doggo":"sora","_vectors":{"my_doggo_embedder":{"regenerate":true,"embeddings":null}}}] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap index 8beb49145..fcbaaace3 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap index 875ae06c6..5b38f28b5 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap index d8a689669..c5b21621f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap index bda90680f..1b9018726 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap index be79abf21..5bbc89c44 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap index 0ee4d91e5..7149d5f97 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap new file mode 100644 index 000000000..18071608b --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap @@ -0,0 +1,10 @@ +--- +source: crates/index-scheduler/src/scheduler/test_failure.rs +snapshot_kind: text +--- +[ + { + "id": 3, + "doggo": "bork" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap index 43be57779..b13a63738 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap index ca1866473..9e10d3052 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/after_register.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/after_register.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/after_register.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/after_register.snap index 5129662eb..4ece15b13 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/after_register.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/after_register.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap index b24d0be1e..24589fc66 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap index 8ab4d84dd..f698eff0a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap index 8ab4d84dd..f698eff0a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap index d8a689669..c5b21621f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap index d8a689669..c5b21621f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap index 2357a404f..1a678e46b 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap index c776baab7..3f3a6f769 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap index 5129662eb..4ece15b13 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/scheduler/test.rs b/crates/index-scheduler/src/scheduler/test.rs new file mode 100644 index 000000000..de12cb25d --- /dev/null +++ b/crates/index-scheduler/src/scheduler/test.rs @@ -0,0 +1,921 @@ +use std::collections::BTreeMap; + +use big_s::S; +use meili_snap::{json_string, snapshot}; +use meilisearch_auth::AuthFilter; +use meilisearch_types::milli::index::IndexEmbeddingConfig; +use meilisearch_types::milli::update::IndexDocumentsMethod::*; +use meilisearch_types::milli::{self}; +use meilisearch_types::settings::SettingEmbeddingSettings; +use meilisearch_types::tasks::{IndexSwap, KindWithContent}; +use roaring::RoaringBitmap; + +use crate::insta_snapshot::snapshot_index_scheduler; +use crate::test_utils::Breakpoint::*; +use crate::test_utils::{ + index_creation_task, read_json, replace_document_import_task, sample_documents, +}; +use crate::IndexScheduler; + +#[test] +fn insert_task_while_another_task_is_processing() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + handle.advance_till([Start, BatchCreated]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_creation"); + + // while the task is processing can we register another task? + index_scheduler.register(index_creation_task("index_b", "id"), None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }, None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); +} + +#[test] +fn test_task_is_processing() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_a_task"); + + handle.advance_till([Start, BatchCreated]); + assert!(index_scheduler.is_task_processing().unwrap()); +} + +/// We send a lot of tasks but notify the tasks scheduler only once as +/// we send them very fast, we must make sure that they are all processed. +#[test] +fn process_tasks_inserted_without_new_signal() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("cattos"), primary_key: None }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_second_task"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_third_task"); +} + +#[test] +fn process_tasks_without_autobatching() { + let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); + + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_fourth_task"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "fourth"); +} + +#[test] +fn task_deletion_undeleteable() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file0.persist().unwrap(); + file1.persist().unwrap(); + + let to_enqueue = [ + index_creation_task("catto", "mouse"), + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("doggo", Some("bone"), 1, documents_count1), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + } + + // here we have registered all the tasks, but the index scheduler + // has not progressed at all + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + + index_scheduler + .register( + KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0, 1]), + }, + None, + false, + ) + .unwrap(); + // again, no progress made at all, but one more task is registered + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_enqueued"); + + // now we create the first batch + handle.advance_till([Start, BatchCreated]); + + // the task deletion should now be "processing" + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processing"); + + handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); + // after the task deletion is processed, no task should actually have been deleted, + // because the tasks with ids 0 and 1 were still "enqueued", and thus undeleteable + // the "task deletion" task should be marked as "succeeded" and, in its details, the + // number of deleted tasks should be 0 + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_done"); +} + +#[test] +fn task_deletion_deleteable() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file0.persist().unwrap(); + file1.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("doggo", Some("bone"), 1, documents_count1), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + + handle.advance_one_successful_batch(); + // first addition of documents should be successful + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed"); + + // Now we delete the first task + index_scheduler + .register( + KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_task_deletion"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); +} + +#[test] +fn task_deletion_delete_same_task_twice() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file0.persist().unwrap(); + file1.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("doggo", Some("bone"), 1, documents_count1), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + + handle.advance_one_successful_batch(); + // first addition of documents should be successful + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed"); + + // Now we delete the first task multiple times in a row + for _ in 0..2 { + index_scheduler + .register( + KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + handle.advance_one_successful_batch(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); +} + +#[test] +fn document_addition_and_index_deletion() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); + + handle.advance_one_successful_batch(); // The index creation. + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "before_index_creation"); + handle.advance_one_successful_batch(); // // after the execution of the two tasks in a single batch. + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "both_task_succeeded"); +} + +#[test] +fn do_not_batch_task_of_different_indexes() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + let index_names = ["doggos", "cattos", "girafos"]; + + for name in index_names { + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: name.to_string(), primary_key: None }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + for name in index_names { + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: name.to_string() }, None, false) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + for _ in 0..(index_names.len() * 2) { + handle.advance_one_successful_batch(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); +} + +#[test] +fn swap_indexes() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let to_enqueue = [ + index_creation_task("a", "id"), + index_creation_task("b", "id"), + index_creation_task("c", "id"), + index_creation_task("d", "id"), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + } + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_a"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_b"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_c"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_d"); + + index_scheduler + .register( + KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("c".to_owned(), "d".to_owned()) }, + ], + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_registered"); + index_scheduler + .register( + KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }], + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "two_swaps_registered"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_processed"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_swap_processed"); + + index_scheduler.register(KindWithContent::IndexSwap { swaps: vec![] }, None, false).unwrap(); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed"); +} + +#[test] +fn swap_indexes_errors() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let to_enqueue = [ + index_creation_task("a", "id"), + index_creation_task("b", "id"), + index_creation_task("c", "id"), + index_creation_task("d", "id"), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + } + handle.advance_n_successful_batches(4); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_the_index_creation"); + + let first_snap = snapshot_index_scheduler(&index_scheduler); + snapshot!(first_snap, name: "initial_tasks_processed"); + + let err = index_scheduler + .register( + KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("b".to_owned(), "a".to_owned()) }, + ], + }, + None, + false, + ) + .unwrap_err(); + snapshot!(format!("{err}"), @"Indexes must be declared only once during a swap. `a`, `b` were specified several times."); + + let second_snap = snapshot_index_scheduler(&index_scheduler); + assert_eq!(first_snap, second_snap); + + // Index `e` does not exist, but we don't check its existence yet + index_scheduler + .register( + KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("c".to_owned(), "e".to_owned()) }, + IndexSwap { indexes: ("d".to_owned(), "f".to_owned()) }, + ], + }, + None, + false, + ) + .unwrap(); + handle.advance_one_failed_batch(); + // Now the first swap should have an error message saying `e` and `f` do not exist + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_failed"); +} + +#[test] +fn document_addition_and_index_deletion_on_unexisting_index() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) + .unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + handle.advance_n_successful_batches(1); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); +} + +#[test] +fn cancel_enqueued_task() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + ]; + for task in to_enqueue { + let _ = index_scheduler.register(task, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); +} + +#[test] +fn cancel_succeeded_task() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + + let _ = index_scheduler + .register(replace_document_import_task("catto", None, 0, documents_count0), None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processed"); + + index_scheduler + .register( + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + false, + ) + .unwrap(); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); +} + +#[test] +fn cancel_processing_task() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + + let _ = index_scheduler + .register(replace_document_import_task("catto", None, 0, documents_count0), None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + handle.advance_till([Start, BatchCreated, InsideProcessBatch]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processing"); + + index_scheduler + .register( + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + false, + ) + .unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_task_registered"); + // Now we check that we can reach the AbortedIndexation error handling + handle.advance_till([AbortedIndexation]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "aborted_indexation"); + + // handle.advance_till([Start, BatchCreated, BeforeProcessing, AfterProcessing]); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); +} + +#[test] +fn cancel_mix_of_tasks() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file1.persist().unwrap(); + let (file2, documents_count2) = sample_documents(&index_scheduler, 2, 2); + file2.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("beavero", None, 1, documents_count1), + replace_document_import_task("wolfo", None, 2, documents_count2), + ]; + for task in to_enqueue { + let _ = index_scheduler.register(task, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + } + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_task_processed"); + + handle.advance_till([Start, BatchCreated, InsideProcessBatch]); + index_scheduler + .register( + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0, 1, 2]), + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processing_second_task_cancel_enqueued"); + + handle.advance_till([AbortedIndexation]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "aborted_indexation"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); +} + +#[test] +fn test_settings_update() { + use meilisearch_types::settings::{Settings, Unchecked}; + use milli::update::Setting; + + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let mut new_settings: Box> = Box::default(); + let mut embedders = BTreeMap::default(); + let embedding_settings = milli::vector::settings::EmbeddingSettings { + source: Setting::Set(milli::vector::settings::EmbedderSource::Rest), + api_key: Setting::Set(S("My super secret")), + url: Setting::Set(S("http://localhost:7777")), + dimensions: Setting::Set(4), + request: Setting::Set(serde_json::json!("{{text}}")), + response: Setting::Set(serde_json::json!("{{embedding}}")), + ..Default::default() + }; + embedders + .insert(S("default"), SettingEmbeddingSettings { inner: Setting::Set(embedding_settings) }); + new_settings.embedders = Setting::Set(embedders); + + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings, + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task"); + + { + let rtxn = index_scheduler.read_txn().unwrap(); + let task = index_scheduler.queue.tasks.get_task(&rtxn, 0).unwrap().unwrap(); + let task = meilisearch_types::task_view::TaskView::from_task(&task); + insta::assert_json_snapshot!(task.details); + } + + handle.advance_n_successful_batches(1); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed"); + + { + let rtxn = index_scheduler.read_txn().unwrap(); + let task = index_scheduler.queue.tasks.get_task(&rtxn, 0).unwrap().unwrap(); + let task = meilisearch_types::task_view::TaskView::from_task(&task); + insta::assert_json_snapshot!(task.details); + } + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + + let configs = index.embedding_configs(&rtxn).unwrap(); + let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap(); + insta::assert_snapshot!(name, @"default"); + insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); + insta::assert_json_snapshot!(config.embedder_options); +} + +#[test] +fn simple_new() { + crate::IndexScheduler::test(true, vec![]); +} + +#[test] +fn basic_get_stats() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let kind = index_creation_task("catto", "mouse"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("whalo", "fish"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + + snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" + { + "indexes": { + "catto": 1, + "doggo": 1, + "whalo": 1 + }, + "statuses": { + "canceled": 0, + "enqueued": 3, + "failed": 0, + "processing": 0, + "succeeded": 0 + }, + "types": { + "documentAdditionOrUpdate": 0, + "documentDeletion": 0, + "documentEdition": 0, + "dumpCreation": 0, + "indexCreation": 3, + "indexDeletion": 0, + "indexSwap": 0, + "indexUpdate": 0, + "settingsUpdate": 0, + "snapshotCreation": 0, + "taskCancelation": 0, + "taskDeletion": 0 + } + } + "###); + + handle.advance_till([Start, BatchCreated]); + snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" + { + "indexes": { + "catto": 1, + "doggo": 1, + "whalo": 1 + }, + "statuses": { + "canceled": 0, + "enqueued": 2, + "failed": 0, + "processing": 1, + "succeeded": 0 + }, + "types": { + "documentAdditionOrUpdate": 0, + "documentDeletion": 0, + "documentEdition": 0, + "dumpCreation": 0, + "indexCreation": 3, + "indexDeletion": 0, + "indexSwap": 0, + "indexUpdate": 0, + "settingsUpdate": 0, + "snapshotCreation": 0, + "taskCancelation": 0, + "taskDeletion": 0 + } + } + "###); + + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" + { + "indexes": { + "catto": 1, + "doggo": 1, + "whalo": 1 + }, + "statuses": { + "canceled": 0, + "enqueued": 1, + "failed": 0, + "processing": 1, + "succeeded": 1 + }, + "types": { + "documentAdditionOrUpdate": 0, + "documentDeletion": 0, + "documentEdition": 0, + "dumpCreation": 0, + "indexCreation": 3, + "indexDeletion": 0, + "indexSwap": 0, + "indexUpdate": 0, + "settingsUpdate": 0, + "snapshotCreation": 0, + "taskCancelation": 0, + "taskDeletion": 0 + } + } + "###); + + // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" + { + "indexes": { + "catto": 1, + "doggo": 1, + "whalo": 1 + }, + "statuses": { + "canceled": 0, + "enqueued": 0, + "failed": 0, + "processing": 1, + "succeeded": 2 + }, + "types": { + "documentAdditionOrUpdate": 0, + "documentDeletion": 0, + "documentEdition": 0, + "dumpCreation": 0, + "indexCreation": 3, + "indexDeletion": 0, + "indexSwap": 0, + "indexUpdate": 0, + "settingsUpdate": 0, + "snapshotCreation": 0, + "taskCancelation": 0, + "taskDeletion": 0 + } + } + "###); +} + +#[test] +fn cancel_processing_dump() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None }; + let dump_cancellation = KindWithContent::TaskCancelation { + query: "cancel dump".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }; + let _ = index_scheduler.register(dump_creation, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register"); + handle.advance_till([Start, BatchCreated, InsideProcessBatch]); + + let _ = index_scheduler.register(dump_cancellation, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered"); + + snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); +} + +#[test] +fn create_and_list_index() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let index_creation = + KindWithContent::IndexCreation { index_uid: S("kefir"), primary_key: None }; + let _ = index_scheduler.register(index_creation, None, false).unwrap(); + handle.advance_till([Start, BatchCreated, InsideProcessBatch]); + // The index creation has not been started, the index should not exists + + let err = index_scheduler.index("kefir").map(|_| ()).unwrap_err(); + snapshot!(err, @"Index `kefir` not found."); + let empty = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap(); + snapshot!(format!("{empty:?}"), @"(0, [])"); + + // After advancing just once the index should've been created, the wtxn has been released and commited + // but the indexUpdate task has not been processed yet + handle.advance_till([InsideProcessBatch]); + + index_scheduler.index("kefir").unwrap(); + let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap(); + snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]" }), @r#" + [ + 1, + [ + [ + "kefir", + { + "number_of_documents": 0, + "database_size": 24576, + "used_database_size": 8192, + "primary_key": null, + "field_distribution": {}, + "created_at": "[date]", + "updated_at": "[date]" + } + ] + ] + ] + "#); +} diff --git a/crates/index-scheduler/src/scheduler/test_document_addition.rs b/crates/index-scheduler/src/scheduler/test_document_addition.rs new file mode 100644 index 000000000..96181cbaa --- /dev/null +++ b/crates/index-scheduler/src/scheduler/test_document_addition.rs @@ -0,0 +1,1169 @@ +use big_s::S; +use meili_snap::snapshot; +use meilisearch_types::milli::obkv_to_json; +use meilisearch_types::milli::update::IndexDocumentsMethod::*; +use meilisearch_types::tasks::KindWithContent; + +use crate::insta_snapshot::snapshot_index_scheduler; +use crate::test_utils::read_json; +use crate::test_utils::Breakpoint::*; +use crate::IndexScheduler; + +#[test] +fn document_addition() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); + + handle.advance_till([Start, BatchCreated]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_the_batch_creation"); + + handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "once_everything_is_processed"); +} + +#[test] +fn document_addition_and_document_deletion() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let content = r#"[ + { "id": 1, "doggo": "jean bob" }, + { "id": 2, "catto": "jorts" }, + { "id": 3, "doggo": "bork" } + ]"#; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + index_scheduler + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1"), S("2")], + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + handle.advance_one_successful_batch(); // The addition AND deletion should've been batched together + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_batch"); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn document_deletion_and_document_addition() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + index_scheduler + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1"), S("2")], + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + let content = r#"[ + { "id": 1, "doggo": "jean bob" }, + { "id": 2, "catto": "jorts" }, + { "id": 3, "doggo": "bork" } + ]"#; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + // The deletion should have failed because it can't create an index + handle.advance_one_failed_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_the_deletion"); + + // The addition should works + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_last_successful_addition"); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_replace() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // everything should be batched together. + handle.advance_n_successful_batches(1); + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_update() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // everything should be batched together. + handle.advance_n_successful_batches(1); + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_mixed_document_addition() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let method = if i % 2 == 0 { UpdateDocuments } else { ReplaceDocuments }; + + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Only half of the task should've been processed since we can't autobatch replace and update together. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); + + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_replace_without_autobatching() { + let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); + + // Everything is processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_update_without_autobatching() { + let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); + + // Everything is processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_cant_create_index_without_index() { + // We're going to autobatch multiple document addition that don't have + // the right to create an index while there is no index currently. + // Thus, everything should be batched together and a IndexDoesNotExists + // error should be throwed. + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Everything should be batched together. + handle.advance_till([ + Start, + BatchCreated, + InsideProcessBatch, + ProcessBatchFailed, + AfterProcessing, + ]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_10_tasks"); + + // The index should not exist. + snapshot!(matches!(index_scheduler.index_exists("doggos"), Ok(true)), @"false"); +} + +#[test] +fn test_document_addition_cant_create_index_without_index_without_autobatching() { + // We're going to execute multiple document addition that don't have + // the right to create an index while there is no index currently. + // Since the auto-batching is disabled, every task should be processed + // sequentially and throw an IndexDoesNotExists. + let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_failed_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); + + // Everything is processed. + handle.advance_n_failed_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // The index should not exist. + snapshot!(matches!(index_scheduler.index_exists("doggos"), Ok(true)), @"false"); +} + +#[test] +fn test_document_addition_cant_create_index_with_index() { + // We're going to autobatch multiple document addition that don't have + // the right to create an index while there is already an index. + // Thus, everything should be batched together and no error should be + // throwed. + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + // Create the index. + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Everything should be batched together. + handle.advance_n_successful_batches(1); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_10_tasks"); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_cant_create_index_with_index_without_autobatching() { + // We're going to execute multiple document addition that don't have + // the right to create an index while there is no index currently. + // Since the autobatching is disabled, every tasks should be processed + // sequentially and throw an IndexDoesNotExists. + let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); + + // Create the index. + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); + + // Everything is processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_mixed_rights_with_index() { + // We're going to autobatch multiple document addition. + // - The index already exists + // - The first document addition don't have the right to create an index + // can it batch with the other one? + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + // Create the index. + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + let allow_index_creation = i % 2 != 0; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Everything should be batched together. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_mixed_right_without_index_starts_with_cant_create() { + // We're going to autobatch multiple document addition. + // - The index does not exists + // - The first document addition don't have the right to create an index + // - The second do. They should not batch together. + // - The second should batch with everything else as it's going to create an index. + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + let allow_index_creation = i % 2 != 0; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // A first batch should be processed with only the first documentAddition that's going to fail. + handle.advance_one_failed_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "only_first_task_failed"); + + // Everything else should be batched together. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_with_multiple_primary_key() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for (id, primary_key) in ["id", "bork", "bloup"].iter().enumerate() { + let content = format!( + r#"{{ + "id": {id}, + "doggo": "jean bob" + }}"#, + ); + let (uuid, mut file) = + index_scheduler.queue.create_update_file_with_uuid(id as u128).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + assert_eq!(documents_count, 1); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S(primary_key)), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_3_tasks"); + + // A first batch should be processed with only the first documentAddition. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "only_first_task_succeed"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_task_fails"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_fails"); + + // Is the primary key still what we expect? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); + snapshot!(primary_key, @"id"); + + // Is the document still the one we expect?. + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_with_multiple_primary_key_batch_wrong_key() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for (id, primary_key) in ["id", "bork", "bork"].iter().enumerate() { + let content = format!( + r#"{{ + "id": {id}, + "doggo": "jean bob" + }}"#, + ); + let (uuid, mut file) = + index_scheduler.queue.create_update_file_with_uuid(id as u128).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + assert_eq!(documents_count, 1); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S(primary_key)), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_3_tasks"); + + // A first batch should be processed with only the first documentAddition. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "only_first_task_succeed"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_and_third_tasks_fails"); + + // Is the primary key still what we expect? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); + snapshot!(primary_key, @"id"); + + // Is the document still the one we expect?. + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_with_bad_primary_key() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for (id, primary_key) in ["bork", "bork", "id", "bork", "id"].iter().enumerate() { + let content = format!( + r#"{{ + "id": {id}, + "doggo": "jean bob" + }}"#, + ); + let (uuid, mut file) = + index_scheduler.queue.create_update_file_with_uuid(id as u128).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + assert_eq!(documents_count, 1); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S(primary_key)), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_5_tasks"); + + // A first batch should be processed with only the first two documentAddition. + // it should fails because the documents don't contains any `bork` field. + // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_and_second_task_fails"); + + // The primary key should be set to none since we failed the batch. + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap(); + snapshot!(primary_key.is_none(), @"true"); + + // The second batch should succeed and only contains one task. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_succeeds"); + + // The primary key should be set to `id` since this batch succeeded. + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); + snapshot!(primary_key, @"id"); + + // We're trying to `bork` again, but now there is already a primary key set for this index. + // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "fourth_task_fails"); + + // Finally the last task should succeed since its primary key is the same as the valid one. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "fifth_task_succeeds"); + + // Is the primary key still what we expect? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); + snapshot!(primary_key, @"id"); + + // Is the document still the one we expect?. + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_with_set_and_null_primary_key() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for (id, primary_key) in + [None, Some("bork"), Some("paw"), None, None, Some("paw")].into_iter().enumerate() + { + let content = format!( + r#"{{ + "paw": {id}, + "doggo": "jean bob" + }}"#, + ); + let (uuid, mut file) = + index_scheduler.queue.create_update_file_with_uuid(id as u128).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + assert_eq!(documents_count, 1); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: primary_key.map(|pk| pk.to_string()), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_6_tasks"); + + // A first batch should contains only one task that fails because we can't infer the primary key. + // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_task_fails"); + + // The second batch should contains only one task that fails because we bork is not a valid primary key. + // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_task_fails"); + + // No primary key should be set at this point. + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap(); + snapshot!(primary_key.is_none(), @"true"); + + // The third batch should succeed and only contains one task. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_succeeds"); + + // The primary key should be set to `id` since this batch succeeded. + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); + snapshot!(primary_key, @"paw"); + + // We should be able to batch together the next two tasks that don't specify any primary key + // + the last task that matches the current primary-key. Everything should succeed. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_other_tasks_succeeds"); + + // Is the primary key still what we expect? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); + snapshot!(primary_key, @"paw"); + + // Is the document still the one we expect?. + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_with_set_and_null_primary_key_inference_works() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for (id, primary_key) in + [None, Some("bork"), Some("doggoid"), None, None, Some("doggoid")].into_iter().enumerate() + { + let content = format!( + r#"{{ + "doggoid": {id}, + "doggo": "jean bob" + }}"#, + ); + let (uuid, mut file) = + index_scheduler.queue.create_update_file_with_uuid(id as u128).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + assert_eq!(documents_count, 1); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: primary_key.map(|pk| pk.to_string()), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_6_tasks"); + + // A first batch should contains only one task that succeed and sets the primary key to `doggoid`. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_task_succeed"); + + // Checking the primary key. + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap(); + snapshot!(primary_key.is_none(), @"false"); + + // The second batch should contains only one task that fails because it tries to update the primary key to `bork`. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_task_fails"); + + // The third batch should succeed and only contains one task. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_succeeds"); + + // We should be able to batch together the next two tasks that don't specify any primary key + // + the last task that matches the current primary-key. Everything should succeed. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_other_tasks_succeeds"); + + // Is the primary key still what we expect? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); + snapshot!(primary_key, @"doggoid"); + + // Is the document still the one we expect?. + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} diff --git a/crates/index-scheduler/src/scheduler/test_embedders.rs b/crates/index-scheduler/src/scheduler/test_embedders.rs new file mode 100644 index 000000000..5ec58bc53 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/test_embedders.rs @@ -0,0 +1,839 @@ +use std::collections::BTreeMap; + +use big_s::S; +use insta::assert_json_snapshot; +use meili_snap::{json_string, snapshot}; +use meilisearch_types::milli::index::IndexEmbeddingConfig; +use meilisearch_types::milli::update::Setting; +use meilisearch_types::milli::vector::settings::EmbeddingSettings; +use meilisearch_types::milli::{self, obkv_to_json}; +use meilisearch_types::settings::{SettingEmbeddingSettings, Settings, Unchecked}; +use meilisearch_types::tasks::KindWithContent; +use milli::update::IndexDocumentsMethod::*; + +use crate::insta_snapshot::snapshot_index_scheduler; +use crate::test_utils::read_json; +use crate::IndexScheduler; + +#[test] +fn import_vectors() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let mut new_settings: Box> = Box::default(); + let mut embedders = BTreeMap::default(); + let embedding_settings = milli::vector::settings::EmbeddingSettings { + source: Setting::Set(milli::vector::settings::EmbedderSource::Rest), + api_key: Setting::Set(S("My super secret")), + url: Setting::Set(S("http://localhost:7777")), + dimensions: Setting::Set(384), + request: Setting::Set(serde_json::json!("{{text}}")), + response: Setting::Set(serde_json::json!("{{embedding}}")), + ..Default::default() + }; + embedders.insert( + S("A_fakerest"), + SettingEmbeddingSettings { inner: Setting::Set(embedding_settings) }, + ); + + let embedding_settings = milli::vector::settings::EmbeddingSettings { + source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace), + model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")), + revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")), + document_template: Setting::Set(S("{{doc.doggo}} the {{doc.breed}} best doggo")), + ..Default::default() + }; + embedders.insert( + S("B_small_hf"), + SettingEmbeddingSettings { inner: Setting::Set(embedding_settings) }, + ); + + new_settings.embedders = Setting::Set(embedders); + + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings, + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors"); + + { + let rtxn = index_scheduler.read_txn().unwrap(); + let task = index_scheduler.queue.tasks.get_task(&rtxn, 0).unwrap().unwrap(); + let task = meilisearch_types::task_view::TaskView::from_task(&task); + insta::assert_json_snapshot!(task.details); + } + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors"); + + { + let rtxn = index_scheduler.read_txn().unwrap(); + let task = index_scheduler.queue.tasks.get_task(&rtxn, 0).unwrap().unwrap(); + let task = meilisearch_types::task_view::TaskView::from_task(&task); + insta::assert_json_snapshot!(task.details); + } + + let (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) = { + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + + let configs = index.embedding_configs(&rtxn).unwrap(); + // for consistency with the below + #[allow(clippy::get_first)] + let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } = + configs.get(0).unwrap(); + insta::assert_snapshot!(name, @"A_fakerest"); + insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); + insta::assert_json_snapshot!(fakerest_config.embedder_options); + let fakerest_name = name.clone(); + + let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } = + configs.get(1).unwrap(); + insta::assert_snapshot!(name, @"B_small_hf"); + insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); + insta::assert_json_snapshot!(simple_hf_config.embedder_options); + let simple_hf_name = name.clone(); + + let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap(); + let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap(); + let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap(); + let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo"), None).unwrap(); + let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo"), None).unwrap(); + (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) + }; + + // add one doc, specifying vectors + + let doc = serde_json::json!( + { + "id": 0, + "doggo": "Intel", + "breed": "beagle", + "_vectors": { + &fakerest_name: { + // this will never trigger regeneration, which is good because we can't actually generate with + // this embedder + "regenerate": false, + "embeddings": beagle_embed, + }, + &simple_hf_name: { + // this will be regenerated on updates + "regenerate": true, + "embeddings": lab_embed, + }, + "noise": [0.1, 0.2, 0.3] + } + } + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0u128).unwrap(); + let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap(); + assert_eq!(documents_count, 1); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel"); + + handle.advance_one_successful_batch(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "adding Intel succeeds"); + + // check embeddings + { + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + + // Ensure the document have been inserted into the relevant bitamp + let configs = index.embedding_configs(&rtxn).unwrap(); + // for consistency with the below + #[allow(clippy::get_first)] + let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } = + configs.get(0).unwrap(); + insta::assert_snapshot!(name, @"A_fakerest"); + insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>"); + + let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap(); + insta::assert_snapshot!(name, @"B_small_hf"); + insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); + + let embeddings = index.embeddings(&rtxn, 0).unwrap(); + + assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true"); + assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true"); + + let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1; + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let doc = obkv_to_json( + &[ + fields_ids_map.id("doggo").unwrap(), + fields_ids_map.id("breed").unwrap(), + fields_ids_map.id("_vectors").unwrap(), + ], + &fields_ids_map, + doc, + ) + .unwrap(); + assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"}); + } + + // update the doc, specifying vectors + + let doc = serde_json::json!( + { + "id": 0, + "doggo": "kefir", + "breed": "patou", + } + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(1u128).unwrap(); + let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap(); + assert_eq!(documents_count, 1); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: None, + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir succeeds"); + + { + // check embeddings + { + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + + // Ensure the document have been inserted into the relevant bitamp + let configs = index.embedding_configs(&rtxn).unwrap(); + // for consistency with the below + #[allow(clippy::get_first)] + let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } = + configs.get(0).unwrap(); + insta::assert_snapshot!(name, @"A_fakerest"); + insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>"); + + let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap(); + insta::assert_snapshot!(name, @"B_small_hf"); + insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); + + let embeddings = index.embeddings(&rtxn, 0).unwrap(); + + // automatically changed to patou because set to regenerate + assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true"); + // remained beagle + assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true"); + + let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1; + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let doc = obkv_to_json( + &[ + fields_ids_map.id("doggo").unwrap(), + fields_ids_map.id("breed").unwrap(), + fields_ids_map.id("_vectors").unwrap(), + ], + &fields_ids_map, + doc, + ) + .unwrap(); + assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"}); + } + } +} + +#[test] +fn import_vectors_first_and_embedder_later() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let content = serde_json::json!( + [ + { + "id": 0, + "doggo": "kefir", + }, + { + "id": 1, + "doggo": "intel", + "_vectors": { + "my_doggo_embedder": vec![1; 384], + "unknown embedder": vec![1, 2, 3], + } + }, + { + "id": 2, + "doggo": "max", + "_vectors": { + "my_doggo_embedder": { + "regenerate": false, + "embeddings": vec![2; 384], + }, + "unknown embedder": vec![4, 5], + }, + }, + { + "id": 3, + "doggo": "marcel", + "_vectors": { + "my_doggo_embedder": { + "regenerate": true, + "embeddings": vec![3; 384], + }, + }, + }, + { + "id": 4, + "doggo": "sora", + "_vectors": { + "my_doggo_embedder": { + "regenerate": true, + }, + }, + }, + ] + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0_u128).unwrap(); + let documents_count = + read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file).unwrap(); + snapshot!(documents_count, @"5"); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: None, + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string(&documents).unwrap(), name: "documents after initial push"); + + let setting = meilisearch_types::settings::Settings:: { + embedders: Setting::Set(maplit::btreemap! { + S("my_doggo_embedder") => SettingEmbeddingSettings { inner: Setting::Set(EmbeddingSettings { + source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace), + model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")), + revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")), + document_template: Setting::Set(S("{{doc.doggo}}")), + ..Default::default() + }) } + }), + ..Default::default() + }; + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings: Box::new(setting), + is_deletion: false, + allow_index_creation: false, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + handle.advance_one_successful_batch(); + index_scheduler.assert_internally_consistent(); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + // the all the vectors linked to the new specified embedder have been removed + // Only the unknown embedders stays in the document DB + snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###); + let conf = index.embedding_configs(&rtxn).unwrap(); + // even though we specified the vector for the ID 3, it shouldn't be marked + // as user provided since we explicitely marked it as NOT user provided. + snapshot!(format!("{conf:#?}"), @r###" + [ + IndexEmbeddingConfig { + name: "my_doggo_embedder", + config: EmbeddingConfig { + embedder_options: HuggingFace( + EmbedderOptions { + model: "sentence-transformers/all-MiniLM-L6-v2", + revision: Some( + "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + ), + distribution: None, + }, + ), + prompt: PromptData { + template: "{{doc.doggo}}", + max_bytes: Some( + 400, + ), + }, + quantized: None, + }, + user_provided: RoaringBitmap<[1, 2]>, + }, + ] + "###); + let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); + let embeddings = index.embeddings(&rtxn, docid).unwrap(); + let embedding = &embeddings["my_doggo_embedder"]; + assert!(!embedding.is_empty(), "{embedding:?}"); + + // the document with the id 3 should keep its original embedding + let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); + let embeddings = index.embeddings(&rtxn, docid).unwrap(); + let embeddings = &embeddings["my_doggo_embedder"]; + + snapshot!(embeddings.len(), @"1"); + assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]); + + // If we update marcel it should regenerate its embedding automatically + + let content = serde_json::json!( + [ + { + "id": 3, + "doggo": "marvel", + }, + { + "id": 4, + "doggo": "sorry", + }, + ] + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(1_u128).unwrap(); + let documents_count = + read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file).unwrap(); + snapshot!(documents_count, @"2"); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: None, + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + // the document with the id 3 should have its original embedding updated + let rtxn = index.read_txn().unwrap(); + let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); + let doc = index.documents(&rtxn, Some(docid)).unwrap()[0]; + let doc = obkv_to_json(&field_ids, &field_ids_map, doc.1).unwrap(); + snapshot!(json_string!(doc), @r###" + { + "id": 3, + "doggo": "marvel" + } + "###); + + let embeddings = index.embeddings(&rtxn, docid).unwrap(); + let embedding = &embeddings["my_doggo_embedder"]; + + assert!(!embedding.is_empty()); + assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]); + + // the document with the id 4 should generate an embedding + let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap(); + let embeddings = index.embeddings(&rtxn, docid).unwrap(); + let embedding = &embeddings["my_doggo_embedder"]; + + assert!(!embedding.is_empty()); +} + +#[test] +fn delete_document_containing_vector() { + // 1. Add an embedder + // 2. Push two documents containing a simple vector + // 3. Delete the first document + // 4. The user defined roaring bitmap shouldn't contains the id of the first document anymore + // 5. Clear the index + // 6. The user defined roaring bitmap shouldn't contains the id of the second document + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let setting = meilisearch_types::settings::Settings:: { + embedders: Setting::Set(maplit::btreemap! { + S("manual") => SettingEmbeddingSettings { inner: Setting::Set(EmbeddingSettings { + source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided), + dimensions: Setting::Set(3), + ..Default::default() + }) } + }), + ..Default::default() + }; + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings: Box::new(setting), + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + let content = serde_json::json!( + [ + { + "id": 0, + "doggo": "kefir", + "_vectors": { + "manual": vec![0, 0, 0], + } + }, + { + "id": 1, + "doggo": "intel", + "_vectors": { + "manual": vec![1, 1, 1], + } + }, + ] + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0_u128).unwrap(); + let documents_count = + read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file).unwrap(); + snapshot!(documents_count, @"2"); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: None, + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + index_scheduler + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1")], + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###); + let conf = index.embedding_configs(&rtxn).unwrap(); + snapshot!(format!("{conf:#?}"), @r###" + [ + IndexEmbeddingConfig { + name: "manual", + config: EmbeddingConfig { + embedder_options: UserProvided( + EmbedderOptions { + dimensions: 3, + distribution: None, + }, + ), + prompt: PromptData { + template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", + max_bytes: Some( + 400, + ), + }, + quantized: None, + }, + user_provided: RoaringBitmap<[0]>, + }, + ] + "###); + let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); + let embeddings = index.embeddings(&rtxn, docid).unwrap(); + let embedding = &embeddings["manual"]; + assert!(!embedding.is_empty(), "{embedding:?}"); + + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) + .unwrap(); + handle.advance_one_successful_batch(); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string(&documents).unwrap(), @"[]"); + let conf = index.embedding_configs(&rtxn).unwrap(); + snapshot!(format!("{conf:#?}"), @r###" + [ + IndexEmbeddingConfig { + name: "manual", + config: EmbeddingConfig { + embedder_options: UserProvided( + EmbedderOptions { + dimensions: 3, + distribution: None, + }, + ), + prompt: PromptData { + template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", + max_bytes: Some( + 400, + ), + }, + quantized: None, + }, + user_provided: RoaringBitmap<[]>, + }, + ] + "###); +} + +#[test] +fn delete_embedder_with_user_provided_vectors() { + // 1. Add two embedders + // 2. Push two documents containing a simple vector + // 3. The documents must not contain the vectors after the update as they are in the vectors db + // 3. Delete the embedders + // 4. The documents contain the vectors again + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let setting = meilisearch_types::settings::Settings:: { + embedders: Setting::Set(maplit::btreemap! { + S("manual") => SettingEmbeddingSettings { inner: Setting::Set(EmbeddingSettings { + source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided), + dimensions: Setting::Set(3), + ..Default::default() + }) }, + S("my_doggo_embedder") => SettingEmbeddingSettings { inner: Setting::Set(EmbeddingSettings { + source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace), + model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")), + revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")), + document_template: Setting::Set(S("{{doc.doggo}}")), + ..Default::default() + }) }, + }), + ..Default::default() + }; + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings: Box::new(setting), + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + let content = serde_json::json!( + [ + { + "id": 0, + "doggo": "kefir", + "_vectors": { + "manual": vec![0, 0, 0], + "my_doggo_embedder": vec![1; 384], + } + }, + { + "id": 1, + "doggo": "intel", + "_vectors": { + "manual": vec![1, 1, 1], + } + }, + ] + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0_u128).unwrap(); + let documents_count = + read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file).unwrap(); + snapshot!(documents_count, @"2"); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: None, + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + { + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel"}]"###); + } + + { + let setting = meilisearch_types::settings::Settings:: { + embedders: Setting::Set(maplit::btreemap! { + S("manual") => SettingEmbeddingSettings { inner: Setting::Reset }, + }), + ..Default::default() + }; + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings: Box::new(setting), + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + } + + { + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###); + } + + { + let setting = meilisearch_types::settings::Settings:: { + embedders: Setting::Reset, + ..Default::default() + }; + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings: Box::new(setting), + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + } + + { + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + + // FIXME: redaction + snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"embeddings\":[[0.0,0.0,0.0]],\"regenerate\":false},\"my_doggo_embedder\":{\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]],\"regenerate\":false}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"embeddings\":[[1.0,1.0,1.0]],\"regenerate\":false}}}]""###); + } +} diff --git a/crates/index-scheduler/src/scheduler/test_failure.rs b/crates/index-scheduler/src/scheduler/test_failure.rs new file mode 100644 index 000000000..cf835daa3 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/test_failure.rs @@ -0,0 +1,251 @@ +use std::time::Instant; + +use big_s::S; +use maplit::btreeset; +use meili_snap::snapshot; +use meilisearch_types::milli::obkv_to_json; +use meilisearch_types::milli::update::IndexDocumentsMethod::*; +use meilisearch_types::milli::update::Setting; +use meilisearch_types::tasks::KindWithContent; + +use crate::insta_snapshot::snapshot_index_scheduler; +use crate::test_utils::Breakpoint::*; +use crate::test_utils::{index_creation_task, read_json, FailureLocation}; +use crate::IndexScheduler; + +#[test] +fn fail_in_process_batch_for_index_creation() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(1, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); + + handle.advance_one_failed_batch(); + + // Still in the first iteration + assert_eq!(*index_scheduler.run_loop_iteration.read().unwrap(), 1); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed"); +} + +#[test] +fn fail_in_process_batch_for_document_addition() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(1, FailureLocation::InsideProcessBatch)]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + handle.advance_till([Start, BatchCreated]); + + snapshot!( + snapshot_index_scheduler(&index_scheduler), + name: "document_addition_batch_created" + ); + + handle.advance_till([ProcessBatchFailed, AfterProcessing]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "document_addition_failed"); +} + +#[test] +fn fail_in_update_task_after_process_batch_success_for_document_addition() { + let (index_scheduler, mut handle) = IndexScheduler::test( + true, + vec![(1, FailureLocation::UpdatingTaskAfterProcessBatchSuccess { task_uid: 0 })], + ); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + handle.advance_till([Start]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "document_addition_succeeded_but_index_scheduler_not_updated"); + + handle.advance_till([BatchCreated, InsideProcessBatch, ProcessBatchSucceeded]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_succeeded"); + + // At this point the next time the scheduler will try to progress it should encounter + // a critical failure and have to wait for 1s before retrying anything. + + let before_failure = Instant::now(); + handle.advance_till([Start]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_to_commit"); + let failure_duration = before_failure.elapsed(); + assert!(failure_duration.as_millis() >= 1000); + + handle.advance_till([BatchCreated, InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_successfully_processed"); +} + +#[test] +fn fail_in_process_batch_for_document_deletion() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + use meilisearch_types::settings::{Settings, Unchecked}; + let mut new_settings: Box> = Box::default(); + new_settings.filterable_attributes = Setting::Set(btreeset!(S("catto"))); + + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings, + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + + let content = r#"[ + { "id": 1, "doggo": "jean bob" }, + { "id": 2, "catto": "jorts" }, + { "id": 3, "doggo": "bork" } + ]"#; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_setting_and_document_addition"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_settings"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_documents"); + + index_scheduler + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1")], + }, + None, + false, + ) + .unwrap(); + // This one should not be catched by Meilisearch but it's still nice to handle it because if one day we break the filters it could happens + index_scheduler + .register( + KindWithContent::DocumentDeletionByFilter { + index_uid: S("doggos"), + filter_expr: serde_json::json!(true), + }, + None, + false, + ) + .unwrap(); + // Should fail because the ids are not filterable + index_scheduler + .register( + KindWithContent::DocumentDeletionByFilter { + index_uid: S("doggos"), + filter_expr: serde_json::json!("id = 2"), + }, + None, + false, + ) + .unwrap(); + index_scheduler + .register( + KindWithContent::DocumentDeletionByFilter { + index_uid: S("doggos"), + filter_expr: serde_json::json!("catto EXISTS"), + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_document_deletions"); + + // Everything should be batched together + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_removing_the_documents"); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents_remaining_should_only_be_bork"); +} + +#[test] +fn panic_in_process_batch_for_index_creation() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(1, FailureLocation::PanicInsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + handle.advance_till([Start, BatchCreated, ProcessBatchFailed, AfterProcessing]); + + // Still in the first iteration + assert_eq!(*index_scheduler.run_loop_iteration.read().unwrap(), 1); + // No matter what happens in process_batch, the index_scheduler should be internally consistent + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed"); +} diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap deleted file mode 100644 index 2b56b71d1..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap +++ /dev/null @@ -1,9 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 3, - "doggo": "bork" - } -] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap deleted file mode 100644 index 45065d8b1..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap +++ /dev/null @@ -1,43 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -### Autobatching Enabled = true -### Processing Tasks: -[] ----------------------------------------------------------------------- -### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} -1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} ----------------------------------------------------------------------- -### Status: -enqueued [1,] -succeeded [0,] ----------------------------------------------------------------------- -### Kind: -"documentAdditionOrUpdate" [1,] -"settingsUpdate" [0,] ----------------------------------------------------------------------- -### Index Tasks: -doggos [0,1,] ----------------------------------------------------------------------- -### Index Mapper: -doggos: { number_of_documents: 0, field_distribution: {} } - ----------------------------------------------------------------------- -### Canceled By: - ----------------------------------------------------------------------- -### Enqueued At: -[timestamp] [0,] -[timestamp] [1,] ----------------------------------------------------------------------- -### Started At: -[timestamp] [0,] ----------------------------------------------------------------------- -### Finished At: -[timestamp] [0,] ----------------------------------------------------------------------- -### File Store: -00000000-0000-0000-0000-000000000000 - ----------------------------------------------------------------------- diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap deleted file mode 100644 index 2b56b71d1..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap +++ /dev/null @@ -1,9 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 3, - "doggo": "bork" - } -] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/documents.snap deleted file mode 100644 index 96f9d447f..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/documents.snap +++ /dev/null @@ -1,9 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 0, - "doggo": "jean bob" - } -] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap deleted file mode 100644 index 96f9d447f..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap +++ /dev/null @@ -1,9 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 0, - "doggo": "jean bob" - } -] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/documents.snap deleted file mode 100644 index 5a839838d..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/documents.snap +++ /dev/null @@ -1,45 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 0, - "doggo": "bob 0" - }, - { - "id": 1, - "doggo": "bob 1" - }, - { - "id": 2, - "doggo": "bob 2" - }, - { - "id": 3, - "doggo": "bob 3" - }, - { - "id": 4, - "doggo": "bob 4" - }, - { - "id": 5, - "doggo": "bob 5" - }, - { - "id": 6, - "doggo": "bob 6" - }, - { - "id": 7, - "doggo": "bob 7" - }, - { - "id": 8, - "doggo": "bob 8" - }, - { - "id": 9, - "doggo": "bob 9" - } -] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/documents.snap deleted file mode 100644 index 5a839838d..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/documents.snap +++ /dev/null @@ -1,45 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 0, - "doggo": "bob 0" - }, - { - "id": 1, - "doggo": "bob 1" - }, - { - "id": 2, - "doggo": "bob 2" - }, - { - "id": 3, - "doggo": "bob 3" - }, - { - "id": 4, - "doggo": "bob 4" - }, - { - "id": 5, - "doggo": "bob 5" - }, - { - "id": 6, - "doggo": "bob 6" - }, - { - "id": 7, - "doggo": "bob 7" - }, - { - "id": 8, - "doggo": "bob 8" - }, - { - "id": 9, - "doggo": "bob 9" - } -] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/documents.snap deleted file mode 100644 index 5a839838d..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/documents.snap +++ /dev/null @@ -1,45 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 0, - "doggo": "bob 0" - }, - { - "id": 1, - "doggo": "bob 1" - }, - { - "id": 2, - "doggo": "bob 2" - }, - { - "id": 3, - "doggo": "bob 3" - }, - { - "id": 4, - "doggo": "bob 4" - }, - { - "id": 5, - "doggo": "bob 5" - }, - { - "id": 6, - "doggo": "bob 6" - }, - { - "id": 7, - "doggo": "bob 7" - }, - { - "id": 8, - "doggo": "bob 8" - }, - { - "id": 9, - "doggo": "bob 9" - } -] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/documents.snap deleted file mode 100644 index 5a839838d..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/documents.snap +++ /dev/null @@ -1,45 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 0, - "doggo": "bob 0" - }, - { - "id": 1, - "doggo": "bob 1" - }, - { - "id": 2, - "doggo": "bob 2" - }, - { - "id": 3, - "doggo": "bob 3" - }, - { - "id": 4, - "doggo": "bob 4" - }, - { - "id": 5, - "doggo": "bob 5" - }, - { - "id": 6, - "doggo": "bob 6" - }, - { - "id": 7, - "doggo": "bob 7" - }, - { - "id": 8, - "doggo": "bob 8" - }, - { - "id": 9, - "doggo": "bob 9" - } -] diff --git a/crates/index-scheduler/src/test_utils.rs b/crates/index-scheduler/src/test_utils.rs new file mode 100644 index 000000000..4be944037 --- /dev/null +++ b/crates/index-scheduler/src/test_utils.rs @@ -0,0 +1,352 @@ +use std::io::{BufWriter, Write}; +use std::sync::Arc; + +use file_store::File; +use meilisearch_types::document_formats::DocumentFormatError; +use meilisearch_types::milli::update::IndexDocumentsMethod::ReplaceDocuments; +use uuid::Uuid; + +use crate::insta_snapshot::snapshot_index_scheduler; +use crate::{Error, IndexScheduler, IndexSchedulerOptions}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum Breakpoint { + // this state is only encountered while creating the scheduler in the test suite. + Init, + + Start, + BatchCreated, + AfterProcessing, + AbortedIndexation, + ProcessBatchSucceeded, + ProcessBatchFailed, + InsideProcessBatch, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum FailureLocation { + InsideCreateBatch, + InsideProcessBatch, + PanicInsideProcessBatch, + AcquiringWtxn, + UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 }, + UpdatingTaskAfterProcessBatchFailure, + CommittingWtxn, +} + +use big_s::S; +use crossbeam_channel::RecvTimeoutError; +use meilisearch_types::milli::update::IndexerConfig; +use meilisearch_types::tasks::KindWithContent; +use meilisearch_types::VERSION_FILE_NAME; +use tempfile::{NamedTempFile, TempDir}; +use Breakpoint::*; + +impl IndexScheduler { + /// Blocks the thread until the test handle asks to progress to/through this breakpoint. + /// + /// Two messages are sent through the channel for each breakpoint. + /// The first message is `(b, false)` and the second message is `(b, true)`. + /// + /// Since the channel has a capacity of zero, the `send` and `recv` calls wait for each other. + /// So when the index scheduler calls `test_breakpoint_sdr.send(b, false)`, it blocks + /// the thread until the test catches up by calling `test_breakpoint_rcv.recv()` enough. + /// From the test side, we call `recv()` repeatedly until we find the message `(breakpoint, false)`. + /// As soon as we find it, the index scheduler is unblocked but then wait again on the call to + /// `test_breakpoint_sdr.send(b, true)`. This message will only be able to send once the + /// test asks to progress to the next `(b2, false)`. + #[cfg(test)] + pub(crate) fn breakpoint(&self, b: Breakpoint) { + // We send two messages. The first one will sync with the call + // to `handle.wait_until(b)`. The second one will block until the + // the next call to `handle.wait_until(..)`. + self.test_breakpoint_sdr.send((b, false)).unwrap(); + // This one will only be able to be sent if the test handle stays alive. + // If it fails, then it means that we have exited the test. + // By crashing with `unwrap`, we kill the run loop. + self.test_breakpoint_sdr.send((b, true)).unwrap(); + } +} + +impl IndexScheduler { + pub(crate) fn test( + autobatching_enabled: bool, + planned_failures: Vec<(usize, FailureLocation)>, + ) -> (Self, IndexSchedulerHandle) { + Self::test_with_custom_config(planned_failures, |config| { + config.autobatching_enabled = autobatching_enabled; + }) + } + + pub(crate) fn test_with_custom_config( + planned_failures: Vec<(usize, FailureLocation)>, + configuration: impl Fn(&mut IndexSchedulerOptions), + ) -> (Self, IndexSchedulerHandle) { + let tempdir = TempDir::new().unwrap(); + let (sender, receiver) = crossbeam_channel::bounded(0); + + let indexer_config = IndexerConfig { skip_index_budget: true, ..Default::default() }; + + let mut options = IndexSchedulerOptions { + version_file_path: tempdir.path().join(VERSION_FILE_NAME), + auth_path: tempdir.path().join("auth"), + tasks_path: tempdir.path().join("db_path"), + update_file_path: tempdir.path().join("file_store"), + indexes_path: tempdir.path().join("indexes"), + snapshots_path: tempdir.path().join("snapshots"), + dumps_path: tempdir.path().join("dumps"), + webhook_url: None, + webhook_authorization_header: None, + task_db_size: 1000 * 1000 * 10, // 10 MB, we don't use MiB on purpose. + index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. + enable_mdb_writemap: false, + index_growth_amount: 1000 * 1000 * 1000 * 1000, // 1 TB + index_count: 5, + indexer_config: Arc::new(indexer_config), + autobatching_enabled: true, + cleanup_enabled: true, + max_number_of_tasks: 1_000_000, + max_number_of_batched_tasks: usize::MAX, + batched_tasks_size_limit: u64::MAX, + instance_features: Default::default(), + }; + configuration(&mut options); + + let index_scheduler = Self::new(options, sender, planned_failures).unwrap(); + + // To be 100% consistent between all test we're going to start the scheduler right now + // and ensure it's in the expected starting state. + let breakpoint = match receiver.recv_timeout(std::time::Duration::from_secs(10)) { + Ok(b) => b, + Err(RecvTimeoutError::Timeout) => { + panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.") + } + Err(RecvTimeoutError::Disconnected) => panic!("The scheduler crashed."), + }; + assert_eq!(breakpoint, (Init, false)); + let index_scheduler_handle = IndexSchedulerHandle { + _tempdir: tempdir, + index_scheduler: index_scheduler.private_clone(), + test_breakpoint_rcv: receiver, + last_breakpoint: breakpoint.0, + }; + + (index_scheduler, index_scheduler_handle) + } + + /// Return a [`PlannedFailure`](Error::PlannedFailure) error if a failure is planned + /// for the given location and current run loop iteration. + pub(crate) fn maybe_fail(&self, location: FailureLocation) -> crate::Result<()> { + if self.planned_failures.contains(&(*self.run_loop_iteration.read().unwrap(), location)) { + match location { + FailureLocation::PanicInsideProcessBatch => { + panic!("simulated panic") + } + _ => Err(Error::PlannedFailure), + } + } else { + Ok(()) + } + } +} + +/// Return a `KindWithContent::IndexCreation` task +pub(crate) fn index_creation_task( + index: &'static str, + primary_key: &'static str, +) -> KindWithContent { + KindWithContent::IndexCreation { index_uid: S(index), primary_key: Some(S(primary_key)) } +} + +/// Create a `KindWithContent::DocumentImport` task that imports documents. +/// +/// - `index_uid` is given as parameter +/// - `primary_key` is given as parameter +/// - `method` is set to `ReplaceDocuments` +/// - `content_file` is given as parameter +/// - `documents_count` is given as parameter +/// - `allow_index_creation` is set to `true` +pub(crate) fn replace_document_import_task( + index: &'static str, + primary_key: Option<&'static str>, + content_file_uuid: u128, + documents_count: u64, +) -> KindWithContent { + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S(index), + primary_key: primary_key.map(ToOwned::to_owned), + method: ReplaceDocuments, + content_file: Uuid::from_u128(content_file_uuid), + documents_count, + allow_index_creation: true, + } +} + +/// Adapting to the new json reading interface +pub(crate) fn read_json( + bytes: &[u8], + write: impl Write, +) -> std::result::Result { + let temp_file = NamedTempFile::new().unwrap(); + let mut buffer = BufWriter::new(temp_file.reopen().unwrap()); + buffer.write_all(bytes).unwrap(); + buffer.flush().unwrap(); + meilisearch_types::document_formats::read_json(temp_file.as_file(), write) +} + +/// Create an update file with the given file uuid. +/// +/// The update file contains just one simple document whose id is given by `document_id`. +/// +/// The uuid of the file and its documents count is returned. +pub(crate) fn sample_documents( + index_scheduler: &IndexScheduler, + file_uuid: u128, + document_id: usize, +) -> (File, u64) { + let content = format!( + r#" + {{ + "id" : "{document_id}" + }}"# + ); + + let (_uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(file_uuid).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + (file, documents_count) +} + +pub struct IndexSchedulerHandle { + _tempdir: TempDir, + index_scheduler: IndexScheduler, + test_breakpoint_rcv: crossbeam_channel::Receiver<(Breakpoint, bool)>, + last_breakpoint: Breakpoint, +} + +impl IndexSchedulerHandle { + /// Advance the scheduler to the next tick. + /// Panic + /// * If the scheduler is waiting for a task to be registered. + /// * If the breakpoint queue is in a bad state. + #[track_caller] + pub(crate) fn advance(&mut self) -> Breakpoint { + let (breakpoint_1, b) = match self + .test_breakpoint_rcv + .recv_timeout(std::time::Duration::from_secs(50)) + { + Ok(b) => b, + Err(RecvTimeoutError::Timeout) => { + let state = snapshot_index_scheduler(&self.index_scheduler); + panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}") + } + Err(RecvTimeoutError::Disconnected) => { + let state = snapshot_index_scheduler(&self.index_scheduler); + panic!("The scheduler crashed.\n{state}") + } + }; + // if we've already encountered a breakpoint we're supposed to be stuck on the false + // and we expect the same variant with the true to come now. + assert_eq!( + (breakpoint_1, b), + (self.last_breakpoint, true), + "Internal error in the test suite. In the previous iteration I got `({:?}, false)` and now I got `({:?}, {:?})`.", + self.last_breakpoint, + breakpoint_1, + b, + ); + + let (breakpoint_2, b) = match self + .test_breakpoint_rcv + .recv_timeout(std::time::Duration::from_secs(50)) + { + Ok(b) => b, + Err(RecvTimeoutError::Timeout) => { + let state = snapshot_index_scheduler(&self.index_scheduler); + panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}") + } + Err(RecvTimeoutError::Disconnected) => { + let state = snapshot_index_scheduler(&self.index_scheduler); + panic!("The scheduler crashed.\n{state}") + } + }; + assert!(!b, "Found the breakpoint handle in a bad state. Check your test suite"); + + self.last_breakpoint = breakpoint_2; + + breakpoint_2 + } + + /// Advance the scheduler until all the provided breakpoints are reached in order. + #[track_caller] + pub(crate) fn advance_till(&mut self, breakpoints: impl IntoIterator) { + for breakpoint in breakpoints { + let b = self.advance(); + assert_eq!( + b, + breakpoint, + "Was expecting the breakpoint `{:?}` but instead got `{:?}`.\n{}", + breakpoint, + b, + snapshot_index_scheduler(&self.index_scheduler) + ); + } + } + + /// Wait for `n` successful batches. + #[track_caller] + pub(crate) fn advance_n_successful_batches(&mut self, n: usize) { + for _ in 0..n { + self.advance_one_successful_batch(); + } + } + + /// Wait for `n` failed batches. + #[track_caller] + pub(crate) fn advance_n_failed_batches(&mut self, n: usize) { + for _ in 0..n { + self.advance_one_failed_batch(); + } + } + + // Wait for one successful batch. + #[track_caller] + pub(crate) fn advance_one_successful_batch(&mut self) { + self.advance_till([Start, BatchCreated]); + loop { + match self.advance() { + // the process_batch function can call itself recursively, thus we need to + // accept as may InsideProcessBatch as possible before moving to the next state. + InsideProcessBatch => (), + // the batch went successfully, we can stop the loop and go on with the next states. + ProcessBatchSucceeded => break, + AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)), + ProcessBatchFailed => { + while self.advance() != Start {} + panic!("The batch failed.\n{}", snapshot_index_scheduler(&self.index_scheduler)) + }, + breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), + } + } + + self.advance_till([AfterProcessing]); + } + + // Wait for one failed batch. + #[track_caller] + pub(crate) fn advance_one_failed_batch(&mut self) { + self.advance_till([Start, BatchCreated]); + loop { + match self.advance() { + // the process_batch function can call itself recursively, thus we need to + // accept as may InsideProcessBatch as possible before moving to the next state. + InsideProcessBatch => (), + // the batch went failed, we can stop the loop and go on with the next states. + ProcessBatchFailed => break, + ProcessBatchSucceeded => panic!("The batch succeeded. (and it wasn't supposed to sorry)\n{}", snapshot_index_scheduler(&self.index_scheduler)), + AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)), + breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), + } + } + self.advance_till([AfterProcessing]); + } +} diff --git a/crates/index-scheduler/src/utils.rs b/crates/index-scheduler/src/utils.rs index 1fcedfddf..1f861776f 100644 --- a/crates/index-scheduler/src/utils.rs +++ b/crates/index-scheduler/src/utils.rs @@ -4,15 +4,14 @@ use std::collections::{BTreeSet, HashSet}; use std::ops::Bound; use meilisearch_types::batches::{Batch, BatchId, BatchStats}; -use meilisearch_types::heed::types::DecodeIgnore; use meilisearch_types::heed::{Database, RoTxn, RwTxn}; use meilisearch_types::milli::CboRoaringBitmapCodec; use meilisearch_types::task_view::DetailsView; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status}; -use roaring::{MultiOps, RoaringBitmap}; +use roaring::RoaringBitmap; use time::OffsetDateTime; -use crate::{Error, IndexScheduler, ProcessingTasks, Result, Task, TaskId, BEI128}; +use crate::{Error, Result, Task, TaskId, BEI128}; /// This structure contains all the information required to write a batch in the database without reading the tasks. /// It'll stay in RAM so it must be small. @@ -22,7 +21,7 @@ use crate::{Error, IndexScheduler, ProcessingTasks, Result, Task, TaskId, BEI128 /// 3. Call `finished` once the batch has been processed. /// 4. Call `update` on all the tasks. #[derive(Debug, Clone)] -pub(crate) struct ProcessingBatch { +pub struct ProcessingBatch { pub uid: BatchId, pub details: DetailsView, pub stats: BatchStats, @@ -143,349 +142,6 @@ impl ProcessingBatch { } } -impl IndexScheduler { - pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result { - enum_iterator::all().map(|s| self.get_status(rtxn, s)).union() - } - - pub(crate) fn all_batch_ids(&self, rtxn: &RoTxn) -> Result { - enum_iterator::all().map(|s| self.get_batch_status(rtxn, s)).union() - } - - pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result> { - Ok(self.all_tasks.remap_data_type::().last(rtxn)?.map(|(k, _)| k + 1)) - } - - pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result { - Ok(self.last_task_id(rtxn)?.unwrap_or_default()) - } - - pub(crate) fn next_batch_id(&self, rtxn: &RoTxn) -> Result { - Ok(self - .all_batches - .remap_data_type::() - .last(rtxn)? - .map(|(k, _)| k + 1) - .unwrap_or_default()) - } - - pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result> { - Ok(self.all_tasks.get(rtxn, &task_id)?) - } - - pub(crate) fn get_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result> { - Ok(self.all_batches.get(rtxn, &batch_id)?) - } - - pub(crate) fn write_batch( - &self, - wtxn: &mut RwTxn, - batch: ProcessingBatch, - tasks: &RoaringBitmap, - ) -> Result<()> { - self.all_batches.put( - wtxn, - &batch.uid, - &Batch { - uid: batch.uid, - progress: None, - details: batch.details, - stats: batch.stats, - started_at: batch.started_at, - finished_at: batch.finished_at, - }, - )?; - self.batch_to_tasks_mapping.put(wtxn, &batch.uid, tasks)?; - - for status in batch.statuses { - self.update_batch_status(wtxn, status, |bitmap| { - bitmap.insert(batch.uid); - })?; - } - - for kind in batch.kinds { - self.update_batch_kind(wtxn, kind, |bitmap| { - bitmap.insert(batch.uid); - })?; - } - - for index in batch.indexes { - self.update_batch_index(wtxn, &index, |bitmap| { - bitmap.insert(batch.uid); - })?; - } - - if let Some(enqueued_at) = batch.oldest_enqueued_at { - insert_task_datetime(wtxn, self.batch_enqueued_at, enqueued_at, batch.uid)?; - } - if let Some(enqueued_at) = batch.earliest_enqueued_at { - insert_task_datetime(wtxn, self.batch_enqueued_at, enqueued_at, batch.uid)?; - } - insert_task_datetime(wtxn, self.batch_started_at, batch.started_at, batch.uid)?; - insert_task_datetime(wtxn, self.batch_finished_at, batch.finished_at.unwrap(), batch.uid)?; - - Ok(()) - } - - /// Convert an iterator to a `Vec` of tasks and edit the `ProcessingBatch` to add the given tasks. - /// - /// The tasks MUST exist, or a `CorruptedTaskQueue` error will be thrown. - pub(crate) fn get_existing_tasks_for_processing_batch( - &self, - rtxn: &RoTxn, - processing_batch: &mut ProcessingBatch, - tasks: impl IntoIterator, - ) -> Result> { - tasks - .into_iter() - .map(|task_id| { - let mut task = self - .get_task(rtxn, task_id) - .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)); - processing_batch.processing(&mut task); - task - }) - .collect::>() - } - - /// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a - /// `CorruptedTaskQueue` error will be thrown. - pub(crate) fn get_existing_tasks( - &self, - rtxn: &RoTxn, - tasks: impl IntoIterator, - ) -> Result> { - tasks - .into_iter() - .map(|task_id| { - self.get_task(rtxn, task_id).and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) - }) - .collect::>() - } - - /// Convert an iterator to a `Vec` of batches. The batches MUST exist or a - /// `CorruptedTaskQueue` error will be thrown. - pub(crate) fn get_existing_batches( - &self, - rtxn: &RoTxn, - processing: &ProcessingTasks, - tasks: impl IntoIterator, - ) -> Result> { - tasks - .into_iter() - .map(|batch_id| { - if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) { - let mut batch = processing.batch.as_ref().unwrap().to_batch(); - batch.progress = processing.get_progress_view(); - Ok(batch) - } else { - self.get_batch(rtxn, batch_id) - .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) - } - }) - .collect::>() - } - - pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> { - let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?; - - debug_assert!(old_task != *task); - debug_assert_eq!(old_task.uid, task.uid); - debug_assert!(old_task.batch_uid.is_none() && task.batch_uid.is_some()); - - if old_task.status != task.status { - self.update_status(wtxn, old_task.status, |bitmap| { - bitmap.remove(task.uid); - })?; - self.update_status(wtxn, task.status, |bitmap| { - bitmap.insert(task.uid); - })?; - } - - if old_task.kind.as_kind() != task.kind.as_kind() { - self.update_kind(wtxn, old_task.kind.as_kind(), |bitmap| { - bitmap.remove(task.uid); - })?; - self.update_kind(wtxn, task.kind.as_kind(), |bitmap| { - bitmap.insert(task.uid); - })?; - } - - assert_eq!( - old_task.enqueued_at, task.enqueued_at, - "Cannot update a task's enqueued_at time" - ); - if old_task.started_at != task.started_at { - assert!(old_task.started_at.is_none(), "Cannot update a task's started_at time"); - if let Some(started_at) = task.started_at { - insert_task_datetime(wtxn, self.started_at, started_at, task.uid)?; - } - } - if old_task.finished_at != task.finished_at { - assert!(old_task.finished_at.is_none(), "Cannot update a task's finished_at time"); - if let Some(finished_at) = task.finished_at { - insert_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?; - } - } - - self.all_tasks.put(wtxn, &task.uid, task)?; - Ok(()) - } - - /// Returns the whole set of tasks that belongs to this batch. - pub(crate) fn tasks_in_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result { - Ok(self.batch_to_tasks_mapping.get(rtxn, &batch_id)?.unwrap_or_default()) - } - - /// Returns the whole set of tasks that belongs to this index. - pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result { - Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default()) - } - - pub(crate) fn update_index( - &self, - wtxn: &mut RwTxn, - index: &str, - f: impl Fn(&mut RoaringBitmap), - ) -> Result<()> { - let mut tasks = self.index_tasks(wtxn, index)?; - f(&mut tasks); - if tasks.is_empty() { - self.index_tasks.delete(wtxn, index)?; - } else { - self.index_tasks.put(wtxn, index, &tasks)?; - } - - Ok(()) - } - - /// Returns the whole set of batches that belongs to this index. - pub(crate) fn index_batches(&self, rtxn: &RoTxn, index: &str) -> Result { - Ok(self.batch_index_tasks.get(rtxn, index)?.unwrap_or_default()) - } - - pub(crate) fn update_batch_index( - &self, - wtxn: &mut RwTxn, - index: &str, - f: impl Fn(&mut RoaringBitmap), - ) -> Result<()> { - let mut batches = self.index_batches(wtxn, index)?; - f(&mut batches); - if batches.is_empty() { - self.batch_index_tasks.delete(wtxn, index)?; - } else { - self.batch_index_tasks.put(wtxn, index, &batches)?; - } - - Ok(()) - } - - pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result { - Ok(self.status.get(rtxn, &status)?.unwrap_or_default()) - } - - pub(crate) fn put_status( - &self, - wtxn: &mut RwTxn, - status: Status, - bitmap: &RoaringBitmap, - ) -> Result<()> { - Ok(self.status.put(wtxn, &status, bitmap)?) - } - - pub(crate) fn update_status( - &self, - wtxn: &mut RwTxn, - status: Status, - f: impl Fn(&mut RoaringBitmap), - ) -> Result<()> { - let mut tasks = self.get_status(wtxn, status)?; - f(&mut tasks); - self.put_status(wtxn, status, &tasks)?; - - Ok(()) - } - - pub(crate) fn get_batch_status(&self, rtxn: &RoTxn, status: Status) -> Result { - Ok(self.batch_status.get(rtxn, &status)?.unwrap_or_default()) - } - - pub(crate) fn put_batch_status( - &self, - wtxn: &mut RwTxn, - status: Status, - bitmap: &RoaringBitmap, - ) -> Result<()> { - Ok(self.batch_status.put(wtxn, &status, bitmap)?) - } - - pub(crate) fn update_batch_status( - &self, - wtxn: &mut RwTxn, - status: Status, - f: impl Fn(&mut RoaringBitmap), - ) -> Result<()> { - let mut tasks = self.get_batch_status(wtxn, status)?; - f(&mut tasks); - self.put_batch_status(wtxn, status, &tasks)?; - - Ok(()) - } - - pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result { - Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default()) - } - - pub(crate) fn put_kind( - &self, - wtxn: &mut RwTxn, - kind: Kind, - bitmap: &RoaringBitmap, - ) -> Result<()> { - Ok(self.kind.put(wtxn, &kind, bitmap)?) - } - - pub(crate) fn update_kind( - &self, - wtxn: &mut RwTxn, - kind: Kind, - f: impl Fn(&mut RoaringBitmap), - ) -> Result<()> { - let mut tasks = self.get_kind(wtxn, kind)?; - f(&mut tasks); - self.put_kind(wtxn, kind, &tasks)?; - - Ok(()) - } - - pub(crate) fn get_batch_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result { - Ok(self.batch_kind.get(rtxn, &kind)?.unwrap_or_default()) - } - - pub(crate) fn put_batch_kind( - &self, - wtxn: &mut RwTxn, - kind: Kind, - bitmap: &RoaringBitmap, - ) -> Result<()> { - Ok(self.batch_kind.put(wtxn, &kind, bitmap)?) - } - - pub(crate) fn update_batch_kind( - &self, - wtxn: &mut RwTxn, - kind: Kind, - f: impl Fn(&mut RoaringBitmap), - ) -> Result<()> { - let mut tasks = self.get_batch_kind(wtxn, kind)?; - f(&mut tasks); - self.put_batch_kind(wtxn, kind, &tasks)?; - - Ok(()) - } -} - pub(crate) fn insert_task_datetime( wtxn: &mut RwTxn, database: Database, @@ -651,11 +307,11 @@ pub fn clamp_to_page_size(size: usize) -> usize { } #[cfg(test)] -impl IndexScheduler { +impl crate::IndexScheduler { /// Asserts that the index scheduler's content is internally consistent. pub fn assert_internally_consistent(&self) { let rtxn = self.env.read_txn().unwrap(); - for task in self.all_tasks.iter(&rtxn).unwrap() { + for task in self.queue.tasks.all_tasks.iter(&rtxn).unwrap() { let (task_id, task) = task.unwrap(); let task_index_uid = task.index_uid().map(ToOwned::to_owned); @@ -674,6 +330,7 @@ impl IndexScheduler { assert_eq!(uid, task.uid); if let Some(ref batch) = batch_uid { assert!(self + .queue .batch_to_tasks_mapping .get(&rtxn, batch) .unwrap() @@ -682,17 +339,26 @@ impl IndexScheduler { } if let Some(task_index_uid) = &task_index_uid { assert!(self + .queue + .tasks .index_tasks .get(&rtxn, task_index_uid.as_str()) .unwrap() .unwrap() .contains(task.uid)); } - let db_enqueued_at = - self.enqueued_at.get(&rtxn, &enqueued_at.unix_timestamp_nanos()).unwrap().unwrap(); + let db_enqueued_at = self + .queue + .tasks + .enqueued_at + .get(&rtxn, &enqueued_at.unix_timestamp_nanos()) + .unwrap() + .unwrap(); assert!(db_enqueued_at.contains(task_id)); if let Some(started_at) = started_at { let db_started_at = self + .queue + .tasks .started_at .get(&rtxn, &started_at.unix_timestamp_nanos()) .unwrap() @@ -701,6 +367,8 @@ impl IndexScheduler { } if let Some(finished_at) = finished_at { let db_finished_at = self + .queue + .tasks .finished_at .get(&rtxn, &finished_at.unix_timestamp_nanos()) .unwrap() @@ -708,9 +376,11 @@ impl IndexScheduler { assert!(db_finished_at.contains(task_id)); } if let Some(canceled_by) = canceled_by { - let db_canceled_tasks = self.get_status(&rtxn, Status::Canceled).unwrap(); + let db_canceled_tasks = + self.queue.tasks.get_status(&rtxn, Status::Canceled).unwrap(); assert!(db_canceled_tasks.contains(uid)); - let db_canceling_task = self.get_task(&rtxn, canceled_by).unwrap().unwrap(); + let db_canceling_task = + self.queue.tasks.get_task(&rtxn, canceled_by).unwrap().unwrap(); assert_eq!(db_canceling_task.status, Status::Succeeded); match db_canceling_task.kind { KindWithContent::TaskCancelation { query: _, tasks } => { @@ -770,7 +440,9 @@ impl IndexScheduler { Details::IndexInfo { primary_key: pk1 } => match &kind { KindWithContent::IndexCreation { index_uid, primary_key: pk2 } | KindWithContent::IndexUpdate { index_uid, primary_key: pk2 } => { - self.index_tasks + self.queue + .tasks + .index_tasks .get(&rtxn, index_uid.as_str()) .unwrap() .unwrap() @@ -878,23 +550,24 @@ impl IndexScheduler { } } - assert!(self.get_status(&rtxn, status).unwrap().contains(uid)); - assert!(self.get_kind(&rtxn, kind.as_kind()).unwrap().contains(uid)); + assert!(self.queue.tasks.get_status(&rtxn, status).unwrap().contains(uid)); + assert!(self.queue.tasks.get_kind(&rtxn, kind.as_kind()).unwrap().contains(uid)); if let KindWithContent::DocumentAdditionOrUpdate { content_file, .. } = kind { match status { Status::Enqueued | Status::Processing => { assert!(self - .file_store + .queue.file_store .all_uuids() .unwrap() .any(|uuid| uuid.as_ref().unwrap() == &content_file), "Could not find uuid `{content_file}` in the file_store. Available uuids are {:?}.", - self.file_store.all_uuids().unwrap().collect::, file_store::Error>>().unwrap(), + self.queue.file_store.all_uuids().unwrap().collect::, file_store::Error>>().unwrap(), ); } Status::Succeeded | Status::Failed | Status::Canceled => { assert!(self + .queue .file_store .all_uuids() .unwrap() diff --git a/crates/meili-snap/Cargo.toml b/crates/meili-snap/Cargo.toml index aee6b497f..0c48ff824 100644 --- a/crates/meili-snap/Cargo.toml +++ b/crates/meili-snap/Cargo.toml @@ -14,4 +14,4 @@ license.workspace = true # fixed version due to format breakages in v1.40 insta = { version = "=1.39.0", features = ["json", "redactions"] } md5 = "0.7.0" -once_cell = "1.19" +once_cell = "1.20" diff --git a/crates/meilisearch-auth/Cargo.toml b/crates/meilisearch-auth/Cargo.toml index 591a40158..d31effd6e 100644 --- a/crates/meilisearch-auth/Cargo.toml +++ b/crates/meilisearch-auth/Cargo.toml @@ -17,10 +17,10 @@ hmac = "0.12.1" maplit = "1.0.2" meilisearch-types = { path = "../meilisearch-types" } rand = "0.8.5" -roaring = { version = "0.10.7", features = ["serde"] } -serde = { version = "1.0.204", features = ["derive"] } -serde_json = { version = "1.0.120", features = ["preserve_order"] } +roaring = { version = "0.10.10", features = ["serde"] } +serde = { version = "1.0.217", features = ["derive"] } +serde_json = { version = "1.0.135", features = ["preserve_order"] } sha2 = "0.10.8" -thiserror = "1.0.61" -time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] } -uuid = { version = "1.10.0", features = ["serde", "v4"] } +thiserror = "2.0.9" +time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] } +uuid = { version = "1.11.0", features = ["serde", "v4"] } diff --git a/crates/meilisearch-types/Cargo.toml b/crates/meilisearch-types/Cargo.toml index 76d8d11ca..ce36c826b 100644 --- a/crates/meilisearch-types/Cargo.toml +++ b/crates/meilisearch-types/Cargo.toml @@ -11,39 +11,41 @@ edition.workspace = true license.workspace = true [dependencies] -actix-web = { version = "4.8.0", default-features = false } -anyhow = "1.0.86" +actix-web = { version = "4.9.0", default-features = false } +anyhow = "1.0.95" bumpalo = "3.16.0" convert_case = "0.6.0" -csv = "1.3.0" -deserr = { version = "0.6.2", features = ["actix-web"] } +csv = "1.3.1" +deserr = { version = "0.6.3", features = ["actix-web"] } either = { version = "1.13.0", features = ["serde"] } enum-iterator = "2.1.0" file-store = { path = "../file-store" } -flate2 = "1.0.30" +flate2 = "1.0.35" fst = "0.4.7" -memmap2 = "0.9.4" +memmap2 = "0.9.5" milli = { path = "../milli" } -bumparaw-collections = "0.1.2" -roaring = { version = "0.10.7", features = ["serde"] } +bumparaw-collections = "0.1.4" +roaring = { version = "0.10.10", features = ["serde"] } rustc-hash = "2.1.0" -serde = { version = "1.0.204", features = ["derive"] } +serde = { version = "1.0.217", features = ["derive"] } serde-cs = "0.2.4" -serde_json = "1.0.120" -tar = "0.4.41" -tempfile = "3.10.1" -thiserror = "1.0.61" -time = { version = "0.3.36", features = [ +serde_json = "1.0.135" +tar = "0.4.43" +tempfile = "3.15.0" +thiserror = "2.0.9" +time = { version = "0.3.37", features = [ "serde-well-known", "formatting", "parsing", "macros", ] } -tokio = "1.38" -uuid = { version = "1.10.0", features = ["serde", "v4"] } +tokio = "1.42" +utoipa = { version = "5.3.1", features = ["macros"] } +uuid = { version = "1.11.0", features = ["serde", "v4"] } [dev-dependencies] -insta = "1.39.0" +# fixed version due to format breakages in v1.40 +insta = "=1.39.0" meili-snap = { path = "../meili-snap" } [features] diff --git a/crates/meilisearch-types/src/batch_view.rs b/crates/meilisearch-types/src/batch_view.rs index 08d25413c..112abd1dd 100644 --- a/crates/meilisearch-types/src/batch_view.rs +++ b/crates/meilisearch-types/src/batch_view.rs @@ -1,13 +1,15 @@ use milli::progress::ProgressView; use serde::Serialize; use time::{Duration, OffsetDateTime}; +use utoipa::ToSchema; use crate::batches::{Batch, BatchId, BatchStats}; use crate::task_view::DetailsView; use crate::tasks::serialize_duration; -#[derive(Debug, Clone, Serialize)] +#[derive(Debug, Clone, Serialize, ToSchema)] #[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] pub struct BatchView { pub uid: BatchId, pub progress: Option, diff --git a/crates/meilisearch-types/src/batches.rs b/crates/meilisearch-types/src/batches.rs index 664dafa7a..7910a5af4 100644 --- a/crates/meilisearch-types/src/batches.rs +++ b/crates/meilisearch-types/src/batches.rs @@ -3,6 +3,7 @@ use std::collections::BTreeMap; use milli::progress::ProgressView; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; +use utoipa::ToSchema; use crate::task_view::DetailsView; use crate::tasks::{Kind, Status}; @@ -25,8 +26,9 @@ pub struct Batch { pub finished_at: Option, } -#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[derive(Default, Debug, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] pub struct BatchStats { pub total_nb_tasks: BatchId, pub status: BTreeMap, diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs index 0c4027899..a864f8aae 100644 --- a/crates/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -7,17 +7,25 @@ use aweb::rt::task::JoinError; use convert_case::Casing; use milli::heed::{Error as HeedError, MdbError}; use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, ToSchema)] #[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] pub struct ResponseError { #[serde(skip)] pub code: StatusCode, + /// The error message. pub message: String, + /// The error code. + #[schema(value_type = Code)] #[serde(rename = "code")] error_code: String, + /// The error type. + #[schema(value_type = ErrorType)] #[serde(rename = "type")] error_type: String, + /// A link to the documentation about this specific error. #[serde(rename = "link")] error_link: String, } @@ -97,7 +105,9 @@ pub trait ErrorCode { } #[allow(clippy::enum_variant_names)] -enum ErrorType { +#[derive(ToSchema)] +#[schema(rename_all = "snake_case")] +pub enum ErrorType { Internal, InvalidRequest, Auth, @@ -129,7 +139,8 @@ impl fmt::Display for ErrorType { /// `MyErrorCode::default().error_code()`. macro_rules! make_error_codes { ($($code_ident:ident, $err_type:ident, $status:ident);*) => { - #[derive(Debug, Clone, Copy, PartialEq, Eq)] + #[derive(Debug, Clone, Copy, PartialEq, Eq, ToSchema)] + #[schema(rename_all = "snake_case")] pub enum Code { $($code_ident),* } diff --git a/crates/meilisearch-types/src/facet_values_sort.rs b/crates/meilisearch-types/src/facet_values_sort.rs index 278061f19..8e0dd2ca4 100644 --- a/crates/meilisearch-types/src/facet_values_sort.rs +++ b/crates/meilisearch-types/src/facet_values_sort.rs @@ -1,8 +1,9 @@ use deserr::Deserr; use milli::OrderBy; use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; -#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Deserr)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Deserr, ToSchema)] #[serde(rename_all = "camelCase")] #[deserr(rename_all = camelCase)] pub enum FacetValuesSort { diff --git a/crates/meilisearch-types/src/index_uid.rs b/crates/meilisearch-types/src/index_uid.rs index 03a31a82f..4bf126794 100644 --- a/crates/meilisearch-types/src/index_uid.rs +++ b/crates/meilisearch-types/src/index_uid.rs @@ -4,13 +4,15 @@ use std::fmt; use std::str::FromStr; use deserr::Deserr; +use utoipa::ToSchema; use crate::error::{Code, ErrorCode}; /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400 /// bytes long -#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord)] +#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord, ToSchema)] #[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)] +#[schema(value_type = String, example = "movies")] pub struct IndexUid(String); impl IndexUid { diff --git a/crates/meilisearch-types/src/keys.rs b/crates/meilisearch-types/src/keys.rs index f7d80bbcb..8fcbab14d 100644 --- a/crates/meilisearch-types/src/keys.rs +++ b/crates/meilisearch-types/src/keys.rs @@ -9,6 +9,7 @@ use serde::{Deserialize, Serialize}; use time::format_description::well_known::Rfc3339; use time::macros::{format_description, time}; use time::{Date, OffsetDateTime, PrimitiveDateTime}; +use utoipa::ToSchema; use uuid::Uuid; use crate::deserr::{immutable_field_error, DeserrError, DeserrJsonError}; @@ -32,19 +33,31 @@ impl MergeWithError for Dese } } -#[derive(Debug, Deserr)] +#[derive(Debug, Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[schema(rename_all = "camelCase")] pub struct CreateApiKey { + /// A description for the key. `null` if empty. + #[schema(example = json!(null))] #[deserr(default, error = DeserrJsonError)] pub description: Option, + /// A human-readable name for the key. `null` if empty. + #[schema(example = "Indexing Products API key")] #[deserr(default, error = DeserrJsonError)] pub name: Option, + /// A uuid v4 to identify the API Key. If not specified, it's generated by Meilisearch. + #[schema(value_type = Uuid, example = json!(null))] #[deserr(default = Uuid::new_v4(), error = DeserrJsonError, try_from(&String) = Uuid::from_str -> uuid::Error)] pub uid: KeyId, + /// A list of actions permitted for the key. `["*"]` for all actions. The `*` character can be used as a wildcard when located at the last position. e.g. `documents.*` to authorize access on all documents endpoints. + #[schema(example = json!(["documents.add"]))] #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_api_key_actions)] pub actions: Vec, + /// A list of accesible indexes permitted for the key. `["*"]` for all indexes. The `*` character can be used as a wildcard when located at the last position. e.g. `products_*` to allow access to all indexes whose names start with `products_`. #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_api_key_indexes)] + #[schema(value_type = Vec, example = json!(["products"]))] pub indexes: Vec, + /// Represent the expiration date and time as RFC 3339 format. `null` equals to no expiration time. #[deserr(error = DeserrJsonError, try_from(Option) = parse_expiration_date -> ParseOffsetDateTimeError, missing_field_error = DeserrJsonError::missing_api_key_expires_at)] pub expires_at: Option, } @@ -86,12 +99,15 @@ fn deny_immutable_fields_api_key( } } -#[derive(Debug, Deserr)] +#[derive(Debug, Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields = deny_immutable_fields_api_key)] +#[schema(rename_all = "camelCase")] pub struct PatchApiKey { #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = "This key is used to update documents in the products index")] pub description: Setting, #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = "Indexing Products API key")] pub name: Setting, } @@ -179,7 +195,9 @@ fn parse_expiration_date( } } -#[derive(Copy, Clone, Serialize, Deserialize, Debug, Eq, PartialEq, Hash, Sequence, Deserr)] +#[derive( + Copy, Clone, Serialize, Deserialize, Debug, Eq, PartialEq, Hash, Sequence, Deserr, ToSchema, +)] #[repr(u8)] pub enum Action { #[serde(rename = "*")] diff --git a/crates/meilisearch-types/src/locales.rs b/crates/meilisearch-types/src/locales.rs index 8d746779e..945c38cc3 100644 --- a/crates/meilisearch-types/src/locales.rs +++ b/crates/meilisearch-types/src/locales.rs @@ -1,8 +1,9 @@ use deserr::Deserr; use milli::LocalizedAttributesRule; use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; -#[derive(Debug, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, ToSchema)] #[deserr(rename_all = camelCase)] #[serde(rename_all = "camelCase")] pub struct LocalizedAttributesRuleView { @@ -33,7 +34,7 @@ impl From for LocalizedAttributesRule { /// this enum implements `Deserr` in order to be used in the API. macro_rules! make_locale { ($(($iso_639_1:ident, $iso_639_1_str:expr) => ($iso_639_3:ident, $iso_639_3_str:expr),)+) => { - #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)] + #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd, ToSchema)] #[deserr(rename_all = camelCase)] #[serde(rename_all = "camelCase")] pub enum Locale { diff --git a/crates/meilisearch-types/src/settings.rs b/crates/meilisearch-types/src/settings.rs index b12dfc9a2..658d7eec4 100644 --- a/crates/meilisearch-types/src/settings.rs +++ b/crates/meilisearch-types/src/settings.rs @@ -13,6 +13,7 @@ use milli::proximity::ProximityPrecision; use milli::update::Setting; use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET}; use serde::{Deserialize, Serialize, Serializer}; +use utoipa::ToSchema; use crate::deserr::DeserrJsonError; use crate::error::deserr_codes::*; @@ -39,10 +40,10 @@ where .serialize(s) } -#[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)] +#[derive(Clone, Default, Debug, Serialize, PartialEq, Eq, ToSchema)] pub struct Checked; -#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq, ToSchema)] pub struct Unchecked; impl Deserr for Unchecked @@ -69,54 +70,63 @@ fn validate_min_word_size_for_typo_setting( Ok(s) } -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] #[serde(deny_unknown_fields, rename_all = "camelCase")] #[deserr(deny_unknown_fields, rename_all = camelCase, validate = validate_min_word_size_for_typo_setting -> DeserrJsonError)] pub struct MinWordSizeTyposSetting { #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option, example = json!(5))] pub one_typo: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option, example = json!(9))] pub two_typos: Setting, } -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] #[serde(deny_unknown_fields, rename_all = "camelCase")] #[deserr(deny_unknown_fields, rename_all = camelCase, where_predicate = __Deserr_E: deserr::MergeWithError>)] pub struct TypoSettings { #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option, example = json!(true))] pub enabled: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!({ "oneTypo": 5, "twoTypo": 9 }))] pub min_word_size_for_typos: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option>, example = json!(["iPhone", "phone"]))] pub disable_on_words: Setting>, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option>, example = json!(["uuid", "url"]))] pub disable_on_attributes: Setting>, } -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] #[serde(deny_unknown_fields, rename_all = "camelCase")] #[deserr(rename_all = camelCase, deny_unknown_fields)] pub struct FacetingSettings { #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option, example = json!(10))] pub max_values_per_facet: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option>, example = json!({ "genre": FacetValuesSort::Count }))] pub sort_facet_values_by: Setting>, } -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] #[serde(deny_unknown_fields, rename_all = "camelCase")] #[deserr(rename_all = camelCase, deny_unknown_fields)] pub struct PaginationSettings { #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option, example = json!(250))] pub max_total_hits: Setting, } @@ -134,79 +144,149 @@ impl MergeWithError for DeserrJsonError)] + pub inner: Setting, +} + +impl fmt::Debug for SettingEmbeddingSettings { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.inner.fmt(f) + } +} + +impl Deserr for SettingEmbeddingSettings { + fn deserialize_from_value( + value: deserr::Value, + location: ValuePointerRef, + ) -> Result { + Setting::::deserialize_from_value( + value, location, + ) + .map(|inner| Self { inner }) + } +} + /// Holds all the settings for an index. `T` can either be `Checked` if they represents settings /// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a /// call to `check` will return a `Settings` from a `Settings`. -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] #[serde( deny_unknown_fields, rename_all = "camelCase", bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>") )] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[schema(rename_all = "camelCase")] pub struct Settings { + /// Fields displayed in the returned documents. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option>, example = json!(["id", "title", "description", "url"]))] pub displayed_attributes: WildcardSetting, - + /// Fields in which to search for matching query words sorted by order of importance. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option>, example = json!(["title", "description"]))] pub searchable_attributes: WildcardSetting, - + /// Attributes to use for faceting and filtering. See [Filtering and Faceted Search](https://www.meilisearch.com/docs/learn/filtering_and_sorting/search_with_facet_filters). #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option>, example = json!(["release_date", "genre"]))] pub filterable_attributes: Setting>, + /// Attributes to use when sorting search results. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option>, example = json!(["release_date"]))] pub sortable_attributes: Setting>, + /// List of ranking rules sorted by order of importance. The order is customizable. + /// [A list of ordered built-in ranking rules](https://www.meilisearch.com/docs/learn/relevancy/relevancy). #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option>, example = json!([RankingRuleView::Words, RankingRuleView::Typo, RankingRuleView::Proximity, RankingRuleView::Attribute, RankingRuleView::Exactness]))] pub ranking_rules: Setting>, + /// List of words ignored when present in search queries. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option>, example = json!(["the", "a", "them", "their"]))] pub stop_words: Setting>, + /// List of characters not delimiting where one term begins and ends. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option>, example = json!([" ", "\n"]))] pub non_separator_tokens: Setting>, + /// List of characters delimiting where one term begins and ends. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option>, example = json!(["S"]))] pub separator_tokens: Setting>, + /// List of strings Meilisearch should parse as a single term. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option>, example = json!(["iPhone pro"]))] pub dictionary: Setting>, + /// List of associated words treated similarly. A word associated to an array of word as synonyms. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option>>, example = json!({ "he": ["she", "they", "them"], "phone": ["iPhone", "android"]}))] pub synonyms: Setting>>, + /// Search returns documents with distinct (different) values of the given field. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!("sku"))] pub distinct_attribute: Setting, + /// Precision level when calculating the proximity ranking rule. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!(ProximityPrecisionView::ByAttribute))] pub proximity_precision: Setting, + /// Customize typo tolerance feature. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!({ "enabled": true, "disableOnAttributes": ["title"]}))] pub typo_tolerance: Setting, + /// Faceting settings. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!({ "maxValuesPerFacet": 10, "sortFacetValuesBy": { "genre": FacetValuesSort::Count }}))] pub faceting: Setting, + /// Pagination settings. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!({ "maxValuesPerFacet": 10, "sortFacetValuesBy": { "genre": FacetValuesSort::Count }}))] pub pagination: Setting, + /// Embedder required for performing semantic search queries. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] - pub embedders: Setting>>, + #[schema(value_type = Option>)] + pub embedders: Setting>, + /// Maximum duration of a search query. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!(50))] pub search_cutoff_ms: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option>, example = json!(50))] pub localized_attributes: Setting>, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!(true))] pub facet_search: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!("Hemlo"))] pub prefix_search: Setting, #[serde(skip)] @@ -221,7 +301,7 @@ impl Settings { }; for mut embedder in embedders.values_mut() { - let Setting::Set(embedder) = &mut embedder else { + let SettingEmbeddingSettings { inner: Setting::Set(embedder) } = &mut embedder else { continue; }; @@ -386,8 +466,9 @@ impl Settings { let Setting::Set(mut configs) = self.embedders else { return Ok(self) }; for (name, config) in configs.iter_mut() { let config_to_check = std::mem::take(config); - let checked_config = milli::update::validate_embedding_settings(config_to_check, name)?; - *config = checked_config + let checked_config = + milli::update::validate_embedding_settings(config_to_check.inner, name)?; + *config = SettingEmbeddingSettings { inner: checked_config }; } self.embedders = Setting::Set(configs); Ok(self) @@ -665,7 +746,9 @@ pub fn apply_settings_to_builder( } match embedders { - Setting::Set(value) => builder.set_embedder_settings(value.clone()), + Setting::Set(value) => builder.set_embedder_settings( + value.iter().map(|(k, v)| (k.clone(), v.inner.clone())).collect(), + ), Setting::Reset => builder.reset_embedder_settings(), Setting::NotSet => (), } @@ -779,7 +862,9 @@ pub fn settings( let embedders: BTreeMap<_, _> = index .embedding_configs(rtxn)? .into_iter() - .map(|IndexEmbeddingConfig { name, config, .. }| (name, Setting::Set(config.into()))) + .map(|IndexEmbeddingConfig { name, config, .. }| { + (name, SettingEmbeddingSettings { inner: Setting::Set(config.into()) }) + }) .collect(); let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) }; @@ -838,7 +923,7 @@ pub fn settings( Ok(settings) } -#[derive(Debug, Clone, PartialEq, Eq, Deserr)] +#[derive(Debug, Clone, PartialEq, Eq, Deserr, ToSchema)] #[deserr(try_from(&String) = FromStr::from_str -> CriterionError)] pub enum RankingRuleView { /// Sorted by decreasing number of matched query terms. @@ -934,7 +1019,7 @@ impl From for Criterion { } } -#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)] +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize, ToSchema)] #[serde(deny_unknown_fields, rename_all = "camelCase")] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub enum ProximityPrecisionView { @@ -1001,8 +1086,9 @@ impl std::ops::Deref for WildcardSetting { } } -#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)] +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize, ToSchema)] #[serde(deny_unknown_fields, rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub enum PrefixSearchSettings { #[default] diff --git a/crates/meilisearch-types/src/star_or.rs b/crates/meilisearch-types/src/star_or.rs index cd26a1fb0..1070b99ff 100644 --- a/crates/meilisearch-types/src/star_or.rs +++ b/crates/meilisearch-types/src/star_or.rs @@ -6,6 +6,7 @@ use std::str::FromStr; use deserr::{DeserializeError, Deserr, MergeWithError, ValueKind}; use serde::de::Visitor; use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use utoipa::PartialSchema; use crate::deserr::query_params::FromQueryParameter; @@ -229,7 +230,7 @@ pub enum OptionStarOrList { List(Vec), } -impl OptionStarOrList { +impl OptionStarOrList { pub fn is_some(&self) -> bool { match self { Self::None => false, diff --git a/crates/meilisearch-types/src/task_view.rs b/crates/meilisearch-types/src/task_view.rs index 64dbd58f7..6032843aa 100644 --- a/crates/meilisearch-types/src/task_view.rs +++ b/crates/meilisearch-types/src/task_view.rs @@ -1,32 +1,49 @@ use milli::Object; use serde::{Deserialize, Serialize}; use time::{Duration, OffsetDateTime}; +use utoipa::ToSchema; use crate::batches::BatchId; use crate::error::ResponseError; use crate::settings::{Settings, Unchecked}; use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId}; -#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, ToSchema)] #[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] pub struct TaskView { + /// The unique sequential identifier of the task. + #[schema(value_type = u32, example = 4312)] pub uid: TaskId, + /// The unique identifier of the index where this task is operated. + #[schema(value_type = Option, example = json!("movies"))] pub batch_uid: Option, #[serde(default)] pub index_uid: Option, pub status: Status, + /// The type of the task. #[serde(rename = "type")] pub kind: Kind, + /// The uid of the task that performed the taskCancelation if the task has been canceled. + #[schema(value_type = Option, example = json!(4326))] pub canceled_by: Option, #[serde(skip_serializing_if = "Option::is_none")] pub details: Option, pub error: Option, + /// Total elasped time the engine was in processing state expressed as a `ISO-8601` duration format. + #[schema(value_type = Option, example = json!(null))] #[serde(serialize_with = "serialize_duration", default)] pub duration: Option, + /// An `RFC 3339` format for date/time/duration. + #[schema(value_type = String, example = json!("2024-08-08_14:12:09.393Z"))] #[serde(with = "time::serde::rfc3339")] pub enqueued_at: OffsetDateTime, + /// An `RFC 3339` format for date/time/duration. + #[schema(value_type = String, example = json!("2024-08-08_14:12:09.393Z"))] #[serde(with = "time::serde::rfc3339::option", default)] pub started_at: Option, + /// An `RFC 3339` format for date/time/duration. + #[schema(value_type = String, example = json!("2024-08-08_14:12:09.393Z"))] #[serde(with = "time::serde::rfc3339::option", default)] pub finished_at: Option, } @@ -50,35 +67,48 @@ impl TaskView { } } -#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] +#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] pub struct DetailsView { + /// Number of documents received for documentAdditionOrUpdate task. #[serde(skip_serializing_if = "Option::is_none")] pub received_documents: Option, + /// Number of documents finally indexed for documentAdditionOrUpdate task or a documentAdditionOrUpdate batch of tasks. #[serde(skip_serializing_if = "Option::is_none")] pub indexed_documents: Option>, + /// Number of documents edited for editDocumentByFunction task. #[serde(skip_serializing_if = "Option::is_none")] pub edited_documents: Option>, + /// Value for the primaryKey field encountered if any for indexCreation or indexUpdate task. #[serde(skip_serializing_if = "Option::is_none")] pub primary_key: Option>, + /// Number of provided document ids for the documentDeletion task. #[serde(skip_serializing_if = "Option::is_none")] pub provided_ids: Option, + /// Number of documents finally deleted for documentDeletion and indexDeletion tasks. #[serde(skip_serializing_if = "Option::is_none")] pub deleted_documents: Option>, + /// Number of tasks that match the request for taskCancelation or taskDeletion tasks. #[serde(skip_serializing_if = "Option::is_none")] pub matched_tasks: Option, + /// Number of tasks canceled for taskCancelation. #[serde(skip_serializing_if = "Option::is_none")] pub canceled_tasks: Option>, + /// Number of tasks deleted for taskDeletion. #[serde(skip_serializing_if = "Option::is_none")] pub deleted_tasks: Option>, + /// Original filter query for taskCancelation or taskDeletion tasks. #[serde(skip_serializing_if = "Option::is_none")] pub original_filter: Option>, + /// Identifier generated for the dump for dumpCreation task. #[serde(skip_serializing_if = "Option::is_none")] pub dump_uid: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub context: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub function: Option, + /// [Learn more about the settings in this guide](https://www.meilisearch.com/docs/reference/api/settings). #[serde(skip_serializing_if = "Option::is_none")] #[serde(flatten)] pub settings: Option>>, diff --git a/crates/meilisearch-types/src/tasks.rs b/crates/meilisearch-types/src/tasks.rs index c62f550ae..167cfcd80 100644 --- a/crates/meilisearch-types/src/tasks.rs +++ b/crates/meilisearch-types/src/tasks.rs @@ -9,6 +9,7 @@ use milli::Object; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize, Serializer}; use time::{Duration, OffsetDateTime}; +use utoipa::ToSchema; use uuid::Uuid; use crate::batches::BatchId; @@ -151,7 +152,7 @@ pub enum KindWithContent { SnapshotCreation, } -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct IndexSwap { pub indexes: (String, String), @@ -363,9 +364,22 @@ impl From<&KindWithContent> for Option
{ } } +/// The status of a task. #[derive( - Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Sequence, PartialOrd, Ord, + Debug, + Clone, + Copy, + PartialEq, + Eq, + Hash, + Serialize, + Deserialize, + Sequence, + PartialOrd, + Ord, + ToSchema, )] +#[schema(example = json!(Status::Processing))] #[serde(rename_all = "camelCase")] pub enum Status { Enqueued, @@ -424,10 +438,23 @@ impl fmt::Display for ParseTaskStatusError { } impl std::error::Error for ParseTaskStatusError {} +/// The type of the task. #[derive( - Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Sequence, PartialOrd, Ord, + Debug, + Clone, + Copy, + PartialEq, + Eq, + Hash, + Serialize, + Deserialize, + Sequence, + PartialOrd, + Ord, + ToSchema, )] #[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase", example = json!(enum_iterator::all::().collect::>()))] pub enum Kind { DocumentAdditionOrUpdate, DocumentEdition, diff --git a/crates/meilisearch/Cargo.toml b/crates/meilisearch/Cargo.toml index 68ca8e136..1d458af34 100644 --- a/crates/meilisearch/Cargo.toml +++ b/crates/meilisearch/Cargo.toml @@ -14,42 +14,42 @@ default-run = "meilisearch" [dependencies] actix-cors = "0.7.0" -actix-http = { version = "3.8.0", default-features = false, features = [ +actix-http = { version = "3.9.0", default-features = false, features = [ "compress-brotli", "compress-gzip", "rustls-0_23", ] } actix-utils = "3.0.1" -actix-web = { version = "4.8.0", default-features = false, features = [ +actix-web = { version = "4.9.0", default-features = false, features = [ "macros", "compress-brotli", "compress-gzip", "cookies", "rustls-0_23", ] } -anyhow = { version = "1.0.86", features = ["backtrace"] } -async-trait = "0.1.81" -bstr = "1.9.1" -byte-unit = { version = "5.1.4", default-features = false, features = [ +anyhow = { version = "1.0.95", features = ["backtrace"] } +async-trait = "0.1.85" +bstr = "1.11.3" +byte-unit = { version = "5.1.6", default-features = false, features = [ "std", "byte", "serde", ] } -bytes = "1.6.0" -clap = { version = "4.5.9", features = ["derive", "env"] } -crossbeam-channel = "0.5.13" -deserr = { version = "0.6.2", features = ["actix-web"] } +bytes = "1.9.0" +clap = { version = "4.5.24", features = ["derive", "env"] } +crossbeam-channel = "0.5.14" +deserr = { version = "0.6.3", features = ["actix-web"] } dump = { path = "../dump" } either = "1.13.0" file-store = { path = "../file-store" } -flate2 = "1.0.30" +flate2 = "1.0.35" fst = "0.4.7" -futures = "0.3.30" -futures-util = "0.3.30" +futures = "0.3.31" +futures-util = "0.3.31" index-scheduler = { path = "../index-scheduler" } -indexmap = { version = "2.2.6", features = ["serde"] } -is-terminal = "0.4.12" -itertools = "0.13.0" +indexmap = { version = "2.7.0", features = ["serde"] } +is-terminal = "0.4.13" +itertools = "0.14.0" jsonwebtoken = "9.3.0" lazy_static = "1.5.0" meilisearch-auth = { path = "../meilisearch-auth" } @@ -58,81 +58,85 @@ mimalloc = { version = "0.1.43", default-features = false } mime = "0.3.17" num_cpus = "1.16.0" obkv = "0.3.0" -once_cell = "1.19.0" -ordered-float = "4.2.1" +once_cell = "1.20.2" +ordered-float = "4.6.0" parking_lot = "0.12.3" permissive-json-pointer = { path = "../permissive-json-pointer" } -pin-project-lite = "0.2.14" +pin-project-lite = "0.2.16" platform-dirs = "0.3.0" prometheus = { version = "0.13.4", features = ["process"] } rand = "0.8.5" rayon = "1.10.0" -regex = "1.10.5" -reqwest = { version = "0.12.5", features = [ +regex = "1.11.1" +reqwest = { version = "0.12.12", features = [ "rustls-tls", "json", ], default-features = false } -rustls = { version = "0.23.11", features = ["ring"], default-features = false } -rustls-pki-types = { version = "1.7.0", features = ["alloc"] } -rustls-pemfile = "2.1.2" -segment = { version = "0.2.4" } -serde = { version = "1.0.204", features = ["derive"] } -serde_json = { version = "1.0.120", features = ["preserve_order"] } +rustls = { version = "0.23.20", features = ["ring"], default-features = false } +rustls-pki-types = { version = "1.10.1", features = ["alloc"] } +rustls-pemfile = "2.2.0" +segment = { version = "0.2.5" } +serde = { version = "1.0.217", features = ["derive"] } +serde_json = { version = "1.0.135", features = ["preserve_order"] } sha2 = "0.10.8" siphasher = "1.0.1" slice-group-by = "0.3.1" static-files = { version = "0.2.4", optional = true } -sysinfo = "0.30.13" -tar = "0.4.41" -tempfile = "3.10.1" -thiserror = "1.0.61" -time = { version = "0.3.36", features = [ +sysinfo = "0.33.1" +tar = "0.4.43" +tempfile = "3.15.0" +thiserror = "2.0.9" +time = { version = "0.3.37", features = [ "serde-well-known", "formatting", "parsing", "macros", ] } -tokio = { version = "1.38.0", features = ["full"] } -toml = "0.8.14" -uuid = { version = "1.10.0", features = ["serde", "v4"] } +tokio = { version = "1.42.0", features = ["full"] } +toml = "0.8.19" +uuid = { version = "1.11.0", features = ["serde", "v4"] } serde_urlencoded = "0.7.1" termcolor = "1.4.1" -url = { version = "2.5.2", features = ["serde"] } -tracing = "0.1.40" -tracing-subscriber = { version = "0.3.18", features = ["json"] } +url = { version = "2.5.4", features = ["serde"] } +tracing = "0.1.41" +tracing-subscriber = { version = "0.3.19", features = ["json"] } tracing-trace = { version = "0.1.0", path = "../tracing-trace" } -tracing-actix-web = "0.7.11" +tracing-actix-web = "0.7.15" build-info = { version = "1.7.0", path = "../build-info" } -roaring = "0.10.7" +roaring = "0.10.10" mopa-maintained = "0.2.3" +utoipa = { version = "5.3.1", features = ["actix_extras", "macros", "non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] } +utoipa-scalar = { version = "0.2.1", optional = true, features = ["actix-web"] } [dev-dependencies] actix-rt = "2.10.0" brotli = "6.0.0" -insta = "1.39.0" +# fixed version due to format breakages in v1.40 +insta = "=1.39.0" manifest-dir-macros = "0.1.18" maplit = "1.0.2" meili-snap = { path = "../meili-snap" } temp-env = "0.3.6" urlencoding = "2.1.3" -wiremock = "0.6.0" +wiremock = "0.6.2" yaup = "0.3.1" [build-dependencies] -anyhow = { version = "1.0.86", optional = true } -cargo_toml = { version = "0.20.3", optional = true } +anyhow = { version = "1.0.95", optional = true } +cargo_toml = { version = "0.21.0", optional = true } hex = { version = "0.4.3", optional = true } -reqwest = { version = "0.12.5", features = [ +reqwest = { version = "0.12.12", features = [ "blocking", "rustls-tls", ], default-features = false, optional = true } sha-1 = { version = "0.10.1", optional = true } static-files = { version = "0.2.4", optional = true } -tempfile = { version = "3.10.1", optional = true } -zip = { version = "2.1.3", optional = true } +tempfile = { version = "3.15.0", optional = true } +zip = { version = "2.2.2", optional = true } [features] default = ["meilisearch-types/all-tokenizations", "mini-dashboard"] +swagger = ["utoipa-scalar"] mini-dashboard = [ "static-files", "anyhow", diff --git a/crates/meilisearch/src/analytics/segment_analytics.rs b/crates/meilisearch/src/analytics/segment_analytics.rs index 7dc746b14..646bff532 100644 --- a/crates/meilisearch/src/analytics/segment_analytics.rs +++ b/crates/meilisearch/src/analytics/segment_analytics.rs @@ -194,6 +194,7 @@ struct Infos { experimental_enable_logs_route: bool, experimental_reduce_indexing_memory_usage: bool, experimental_max_number_of_batched_tasks: usize, + experimental_limit_batched_tasks_total_size: u64, gpu_enabled: bool, db_path: bool, import_dump: bool, @@ -239,6 +240,7 @@ impl Infos { experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, experimental_max_number_of_batched_tasks, + experimental_limit_batched_tasks_total_size, http_addr, master_key: _, env, @@ -314,6 +316,7 @@ impl Infos { http_addr: http_addr != default_http_addr(), http_payload_size_limit, experimental_max_number_of_batched_tasks, + experimental_limit_batched_tasks_total_size, task_queue_webhook: task_webhook_url.is_some(), task_webhook_authorization_header: task_webhook_authorization_header.is_some(), log_level: log_level.to_string(), @@ -426,13 +429,9 @@ impl Segment { &AuthFilter::default(), ) { // Replace the version number with the prototype name if any. - let version = if let Some(prototype) = build_info::DescribeResult::from_build() + let version = build_info::DescribeResult::from_build() .and_then(|describe| describe.as_prototype()) - { - prototype - } else { - env!("CARGO_PKG_VERSION") - }; + .unwrap_or(env!("CARGO_PKG_VERSION")); let _ = self .batcher diff --git a/crates/meilisearch/src/error.rs b/crates/meilisearch/src/error.rs index 41d62507a..b13eb8d7c 100644 --- a/crates/meilisearch/src/error.rs +++ b/crates/meilisearch/src/error.rs @@ -19,15 +19,15 @@ pub enum MeilisearchHttpError { #[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")] CsvDelimiterWithWrongContentType(String), #[error( - "The Content-Type `{0}` is invalid. Accepted values for the Content-Type header are: {}", - .1.iter().map(|s| format!("`{}`", s)).collect::>().join(", ") + "The Content-Type `{}` is invalid. Accepted values for the Content-Type header are: {}", + .0, .1.iter().map(|s| format!("`{}`", s)).collect::>().join(", ") )] InvalidContentType(String, Vec), #[error("Document `{0}` not found.")] DocumentNotFound(String), #[error("Sending an empty filter is forbidden.")] EmptyFilter, - #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] + #[error("Invalid syntax for the filter parameter: `expected {}, found: {}`.", .0.join(", "), .1)] InvalidExpression(&'static [&'static str], Value), #[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")] FederationOptionsInNonFederatedRequest(usize), diff --git a/crates/meilisearch/src/lib.rs b/crates/meilisearch/src/lib.rs index 9e6e45836..a8b8b8eba 100644 --- a/crates/meilisearch/src/lib.rs +++ b/crates/meilisearch/src/lib.rs @@ -188,13 +188,13 @@ impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger { if let Some(error) = response.response().error() { // use the status code already constructed for the outgoing HTTP response - span.record("error", &tracing::field::display(error.as_response_error())); + span.record("error", tracing::field::display(error.as_response_error())); } } Err(error) => { let code: i32 = error.error_response().status().as_u16().into(); span.record("status_code", code); - span.record("error", &tracing::field::display(error.as_response_error())); + span.record("error", tracing::field::display(error.as_response_error())); } }; } @@ -307,11 +307,12 @@ fn open_or_create_database_unchecked( task_db_size: opt.max_task_db_size.as_u64() as usize, index_base_map_size: opt.max_index_size.as_u64() as usize, enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage, - indexer_config: (&opt.indexer_options).try_into()?, + indexer_config: Arc::new((&opt.indexer_options).try_into()?), autobatching_enabled: true, cleanup_enabled: !opt.experimental_replication_parameters, max_number_of_tasks: 1_000_000, max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks, + batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size, index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize, index_count: DEFAULT_INDEX_COUNT, instance_features, diff --git a/crates/meilisearch/src/option.rs b/crates/meilisearch/src/option.rs index 33a8a2f71..b5aa6b9e7 100644 --- a/crates/meilisearch/src/option.rs +++ b/crates/meilisearch/src/option.rs @@ -60,6 +60,8 @@ const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE"; const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str = "MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS"; +const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str = + "MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_SIZE"; const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; const DEFAULT_DB_PATH: &str = "./data.ms"; @@ -200,20 +202,23 @@ pub struct Opt { #[clap(long, env = MEILI_TASK_WEBHOOK_URL)] pub task_webhook_url: Option, - /// The Authorization header to send on the webhook URL whenever a task finishes so a third party can be notified. + /// The Authorization header to send on the webhook URL whenever + /// a task finishes so a third party can be notified. #[clap(long, env = MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER)] pub task_webhook_authorization_header: Option, /// Deactivates Meilisearch's built-in telemetry when provided. /// - /// Meilisearch automatically collects data from all instances that do not opt out using this flag. - /// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted + /// Meilisearch automatically collects data from all instances that + /// do not opt out using this flag. All gathered data is used solely + /// for the purpose of improving Meilisearch, and can be deleted /// at any time. #[serde(default)] // we can't send true #[clap(long, env = MEILI_NO_ANALYTICS)] pub no_analytics: bool, - /// Sets the maximum size of the index. Value must be given in bytes or explicitly stating a base unit (for instance: 107374182400, '107.7Gb', or '107374 Mb'). + /// Sets the maximum size of the index. Value must be given in bytes or explicitly + /// stating a base unit (for instance: 107374182400, '107.7Gb', or '107374 Mb'). #[clap(skip = default_max_index_size())] #[serde(skip, default = "default_max_index_size")] pub max_index_size: Byte, @@ -333,43 +338,53 @@ pub struct Opt { /// Defines how much detail should be present in Meilisearch's logs. /// - /// Meilisearch currently supports six log levels, listed in order of increasing verbosity: OFF, ERROR, WARN, INFO, DEBUG, TRACE. + /// Meilisearch currently supports six log levels, listed in order of + /// increasing verbosity: OFF, ERROR, WARN, INFO, DEBUG, TRACE. #[clap(long, env = MEILI_LOG_LEVEL, default_value_t)] #[serde(default)] pub log_level: LogLevel, - /// Experimental contains filter feature. For more information, see: + /// Experimental contains filter feature. For more information, + /// see: /// /// Enables the experimental contains filter operator. #[clap(long, env = MEILI_EXPERIMENTAL_CONTAINS_FILTER)] #[serde(default)] pub experimental_contains_filter: bool, - /// Experimental metrics feature. For more information, see: + /// Experimental metrics feature. For more information, + /// see: /// /// Enables the Prometheus metrics on the `GET /metrics` endpoint. #[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_METRICS)] #[serde(default)] pub experimental_enable_metrics: bool, - /// Experimental search queue size. For more information, see: + /// Experimental search queue size. For more information, + /// see: + /// + /// Lets you customize the size of the search queue. Meilisearch processes + /// your search requests as fast as possible but once the queue is full + /// it starts returning HTTP 503, Service Unavailable. /// - /// Lets you customize the size of the search queue. Meilisearch processes your search requests as fast as possible but once the - /// queue is full it starts returning HTTP 503, Service Unavailable. /// The default value is 1000. #[clap(long, env = MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE, default_value_t = default_experimental_search_queue_size())] #[serde(default = "default_experimental_search_queue_size")] pub experimental_search_queue_size: usize, - /// Experimental drop search after. For more information, see: + /// Experimental drop search after. For more information, + /// see: + /// + /// Let you customize after how many seconds Meilisearch should consider + /// a search request irrelevant and drop it. /// - /// Let you customize after how many seconds Meilisearch should consider a search request irrelevant and drop it. /// The default value is 60. #[clap(long, env = MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER, default_value_t = default_drop_search_after())] #[serde(default = "default_drop_search_after")] pub experimental_drop_search_after: NonZeroUsize, - /// Experimental number of searches per core. For more information, see: + /// Experimental number of searches per core. For more information, + /// see: /// /// Lets you customize how many search requests can run on each core concurrently. /// The default value is 4. @@ -377,16 +392,19 @@ pub struct Opt { #[serde(default = "default_nb_searches_per_core")] pub experimental_nb_searches_per_core: NonZeroUsize, - /// Experimental logs mode feature. For more information, see: + /// Experimental logs mode feature. For more information, + /// see: /// /// Change the mode of the logs on the console. #[clap(long, env = MEILI_EXPERIMENTAL_LOGS_MODE, default_value_t)] #[serde(default)] pub experimental_logs_mode: LogMode, - /// Experimental logs route feature. For more information, see: + /// Experimental logs route feature. For more information, + /// see: /// - /// Enables the log routes on the `POST /logs/stream`, `POST /logs/stderr` endpoints, and the `DELETE /logs/stream` to stop receiving logs. + /// Enables the log routes on the `POST /logs/stream`, `POST /logs/stderr` endpoints, + /// and the `DELETE /logs/stream` to stop receiving logs. #[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE)] #[serde(default)] pub experimental_enable_logs_route: bool, @@ -396,21 +414,30 @@ pub struct Opt { /// /// - /!\ Disable the automatic clean up of old processed tasks, you're in charge of that now /// - Lets you specify a custom task ID upon registering a task - /// - Lets you execute dry-register a task (get an answer from the route but nothing is actually registered in meilisearch and it won't be processed) + /// - Lets you execute dry-register a task (get an answer from the route but nothing is actually + /// registered in meilisearch and it won't be processed) #[clap(long, env = MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS)] #[serde(default)] pub experimental_replication_parameters: bool, - /// Experimental RAM reduction during indexing, do not use in production, see: + /// Experimental RAM reduction during indexing, do not use in production, + /// see: #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] #[serde(default)] pub experimental_reduce_indexing_memory_usage: bool, - /// Experimentally reduces the maximum number of tasks that will be processed at once, see: + /// Experimentally reduces the maximum number of tasks that will be processed at once, + /// see: #[clap(long, env = MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS, default_value_t = default_limit_batched_tasks())] #[serde(default = "default_limit_batched_tasks")] pub experimental_max_number_of_batched_tasks: usize, + /// Experimentally reduces the maximum total size, in bytes, of tasks that will be processed at once, + /// see: + #[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, default_value_t = default_limit_batched_tasks_total_size())] + #[serde(default = "default_limit_batched_tasks_total_size")] + pub experimental_limit_batched_tasks_total_size: u64, + #[serde(flatten)] #[clap(flatten)] pub indexer_options: IndexerOpts, @@ -482,7 +509,6 @@ impl Opt { max_index_size: _, max_task_db_size: _, http_payload_size_limit, - experimental_max_number_of_batched_tasks, ssl_cert_path, ssl_key_path, ssl_auth_path, @@ -512,6 +538,8 @@ impl Opt { experimental_enable_logs_route, experimental_replication_parameters, experimental_reduce_indexing_memory_usage, + experimental_max_number_of_batched_tasks, + experimental_limit_batched_tasks_total_size, } = self; export_to_env_if_not_present(MEILI_DB_PATH, db_path); export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); @@ -534,10 +562,6 @@ impl Opt { MEILI_HTTP_PAYLOAD_SIZE_LIMIT, http_payload_size_limit.to_string(), ); - export_to_env_if_not_present( - MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS, - experimental_max_number_of_batched_tasks.to_string(), - ); if let Some(ssl_cert_path) = ssl_cert_path { export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path); } @@ -596,6 +620,14 @@ impl Opt { MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE, experimental_reduce_indexing_memory_usage.to_string(), ); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS, + experimental_max_number_of_batched_tasks.to_string(), + ); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, + experimental_limit_batched_tasks_total_size.to_string(), + ); indexer_options.export_to_env(); } @@ -760,8 +792,8 @@ impl MaxMemory { /// Returns the total amount of bytes available or `None` if this system isn't supported. fn total_memory_bytes() -> Option { if sysinfo::IS_SUPPORTED_SYSTEM { - let memory_kind = RefreshKind::new().with_memory(MemoryRefreshKind::new().with_ram()); - let mut system = System::new_with_specifics(memory_kind); + let mem_kind = RefreshKind::nothing().with_memory(MemoryRefreshKind::nothing().with_ram()); + let mut system = System::new_with_specifics(mem_kind); system.refresh_memory(); Some(system.total_memory()) } else { @@ -899,6 +931,10 @@ fn default_limit_batched_tasks() -> usize { usize::MAX } +fn default_limit_batched_tasks_total_size() -> u64 { + u64::MAX +} + fn default_snapshot_dir() -> PathBuf { PathBuf::from(DEFAULT_SNAPSHOT_DIR) } diff --git a/crates/meilisearch/src/routes/api_key.rs b/crates/meilisearch/src/routes/api_key.rs index 0bd4b9d59..3130006e3 100644 --- a/crates/meilisearch/src/routes/api_key.rs +++ b/crates/meilisearch/src/routes/api_key.rs @@ -13,14 +13,28 @@ use meilisearch_types::error::{Code, ResponseError}; use meilisearch_types::keys::{CreateApiKey, Key, PatchApiKey}; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; +use utoipa::{IntoParams, OpenApi, ToSchema}; use uuid::Uuid; -use super::PAGINATION_DEFAULT_LIMIT; +use super::{PaginationView, PAGINATION_DEFAULT_LIMIT, PAGINATION_DEFAULT_LIMIT_FN}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::Pagination; +#[derive(OpenApi)] +#[openapi( + paths(create_api_key, list_api_keys, get_api_key, patch_api_key, delete_api_key), + tags(( + name = "Keys", + description = "Manage API `keys` for a Meilisearch instance. Each key has a given set of permissions. +You must have the master key or the default admin key to access the keys route. More information about the keys and their rights. +Accessing any route under `/keys` without having set a master key will result in an error.", + external_docs(url = "https://www.meilisearch.com/docs/reference/api/keys"), + )), +)] +pub struct ApiKeyApi; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") @@ -35,6 +49,51 @@ pub fn configure(cfg: &mut web::ServiceConfig) { ); } +/// Create an API Key +/// +/// Create an API Key. +#[utoipa::path( + post, + path = "", + tag = "Keys", + security(("Bearer" = ["keys.create", "keys.*", "*"])), + request_body = CreateApiKey, + responses( + (status = 202, description = "Key has been created", body = KeyView, content_type = "application/json", example = json!( + { + "uid": "01b4bc42-eb33-4041-b481-254d00cce834", + "key": "d0552b41536279a0ad88bd595327b96f01176a60c2243e906c52ac02375f9bc4", + "name": "Indexing Products API key", + "description": null, + "actions": [ + "documents.add" + ], + "indexes": [ + "products" + ], + "expiresAt": "2021-11-13T00:00:00Z", + "createdAt": "2021-11-12T10:00:00Z", + "updatedAt": "2021-11-12T10:00:00Z" + } + )), + (status = 401, description = "The route has been hit on an unprotected instance", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Meilisearch is running without a master key. To access this API endpoint, you must have set a master key at launch.", + "code": "missing_master_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_master_key" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn create_api_key( auth_controller: GuardedData, Data>, body: AwebJson, @@ -51,12 +110,15 @@ pub async fn create_api_key( Ok(HttpResponse::Created().json(res)) } -#[derive(Deserr, Debug, Clone, Copy)] +#[derive(Deserr, Debug, Clone, Copy, IntoParams)] #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] +#[into_params(rename_all = "camelCase", parameter_in = Query)] pub struct ListApiKeys { #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = usize, default = 0)] pub offset: Param, #[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError)] + #[param(value_type = usize, default = PAGINATION_DEFAULT_LIMIT_FN)] pub limit: Param, } @@ -66,6 +128,58 @@ impl ListApiKeys { } } +/// Get API Keys +/// +/// List all API Keys +#[utoipa::path( + get, + path = "", + tag = "Keys", + security(("Bearer" = ["keys.get", "keys.*", "*"])), + params(ListApiKeys), + responses( + (status = 202, description = "List of keys", body = PaginationView, content_type = "application/json", example = json!( + { + "results": [ + { + "uid": "01b4bc42-eb33-4041-b481-254d00cce834", + "key": "d0552b41536279a0ad88bd595327b96f01176a60c2243e906c52ac02375f9bc4", + "name": "An API Key", + "description": null, + "actions": [ + "documents.add" + ], + "indexes": [ + "movies" + ], + "expiresAt": "2022-11-12T10:00:00Z", + "createdAt": "2021-11-12T10:00:00Z", + "updatedAt": "2021-11-12T10:00:00Z" + } + ], + "limit": 20, + "offset": 0, + "total": 1 + } + )), + (status = 401, description = "The route has been hit on an unprotected instance", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Meilisearch is running without a master key. To access this API endpoint, you must have set a master key at launch.", + "code": "missing_master_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_master_key" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn list_api_keys( auth_controller: GuardedData, Data>, list_api_keys: AwebQueryParameter, @@ -84,6 +198,51 @@ pub async fn list_api_keys( Ok(HttpResponse::Ok().json(page_view)) } +/// Get an API Key +/// +/// Get an API key from its `uid` or its `key` field. +#[utoipa::path( + get, + path = "/{uidOrKey}", + tag = "Keys", + security(("Bearer" = ["keys.get", "keys.*", "*"])), + params(("uidOrKey" = String, Path, format = Password, example = "7b198a7f-52a0-4188-8762-9ad93cd608b2", description = "The `uid` or `key` field of an existing API key", nullable = false)), + responses( + (status = 200, description = "The key is returned", body = KeyView, content_type = "application/json", example = json!( + { + "uid": "01b4bc42-eb33-4041-b481-254d00cce834", + "key": "d0552b41536279a0ad88bd595327b96f01176a60c2243e906c52ac02375f9bc4", + "name": "An API Key", + "description": null, + "actions": [ + "documents.add" + ], + "indexes": [ + "movies" + ], + "expiresAt": "2022-11-12T10:00:00Z", + "createdAt": "2021-11-12T10:00:00Z", + "updatedAt": "2021-11-12T10:00:00Z" + } + )), + (status = 401, description = "The route has been hit on an unprotected instance", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Meilisearch is running without a master key. To access this API endpoint, you must have set a master key at launch.", + "code": "missing_master_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_master_key" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn get_api_key( auth_controller: GuardedData, Data>, path: web::Path, @@ -103,6 +262,53 @@ pub async fn get_api_key( Ok(HttpResponse::Ok().json(res)) } +/// Update a Key +/// +/// Update the name and description of an API key. +/// Updates to keys are partial. This means you should provide only the fields you intend to update, as any fields not present in the payload will remain unchanged. +#[utoipa::path( + patch, + path = "/{uidOrKey}", + tag = "Keys", + security(("Bearer" = ["keys.update", "keys.*", "*"])), + params(("uidOrKey" = String, Path, format = Password, example = "7b198a7f-52a0-4188-8762-9ad93cd608b2", description = "The `uid` or `key` field of an existing API key", nullable = false)), + request_body = PatchApiKey, + responses( + (status = 200, description = "The key have been updated", body = KeyView, content_type = "application/json", example = json!( + { + "uid": "01b4bc42-eb33-4041-b481-254d00cce834", + "key": "d0552b41536279a0ad88bd595327b96f01176a60c2243e906c52ac02375f9bc4", + "name": "An API Key", + "description": null, + "actions": [ + "documents.add" + ], + "indexes": [ + "movies" + ], + "expiresAt": "2022-11-12T10:00:00Z", + "createdAt": "2021-11-12T10:00:00Z", + "updatedAt": "2021-11-12T10:00:00Z" + } + )), + (status = 401, description = "The route has been hit on an unprotected instance", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Meilisearch is running without a master key. To access this API endpoint, you must have set a master key at launch.", + "code": "missing_master_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_master_key" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn patch_api_key( auth_controller: GuardedData, Data>, body: AwebJson, @@ -123,6 +329,35 @@ pub async fn patch_api_key( Ok(HttpResponse::Ok().json(res)) } +/// Delete a key +/// +/// Delete the specified API key. +#[utoipa::path( + delete, + path = "/{uidOrKey}", + tag = "Keys", + security(("Bearer" = ["keys.delete", "keys.*", "*"])), + params(("uidOrKey" = String, Path, format = Password, example = "7b198a7f-52a0-4188-8762-9ad93cd608b2", description = "The `uid` or `key` field of an existing API key", nullable = false)), + responses( + (status = NO_CONTENT, description = "The key have been removed"), + (status = 401, description = "The route has been hit on an unprotected instance", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Meilisearch is running without a master key. To access this API endpoint, you must have set a master key at launch.", + "code": "missing_master_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_master_key" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn delete_api_key( auth_controller: GuardedData, Data>, path: web::Path, @@ -144,19 +379,30 @@ pub struct AuthParam { key: String, } -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, ToSchema)] #[serde(rename_all = "camelCase")] -struct KeyView { +pub(super) struct KeyView { + /// The name of the API Key if any name: Option, + /// The description of the API Key if any description: Option, + /// The actual API Key you can send to Meilisearch key: String, + /// The `Uuid` specified while creating the key or autogenerated by Meilisearch. uid: Uuid, + /// The actions accessible with this key. actions: Vec, + /// The indexes accessible with this key. indexes: Vec, + /// The expiration date of the key. Once this timestamp is exceeded the key is not deleted but cannot be used anymore. #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] expires_at: Option, + /// The date of creation of this API Key. + #[schema(read_only)] #[serde(serialize_with = "time::serde::rfc3339::serialize")] created_at: OffsetDateTime, + /// The date of the last update made on this key. + #[schema(read_only)] #[serde(serialize_with = "time::serde::rfc3339::serialize")] updated_at: OffsetDateTime, } diff --git a/crates/meilisearch/src/routes/batches.rs b/crates/meilisearch/src/routes/batches.rs index 4d42cdd16..8ca9f1537 100644 --- a/crates/meilisearch/src/routes/batches.rs +++ b/crates/meilisearch/src/routes/batches.rs @@ -8,17 +8,76 @@ use meilisearch_types::deserr::DeserrQueryParamError; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; use serde::Serialize; +use utoipa::{OpenApi, ToSchema}; use super::tasks::TasksFilterQuery; use super::ActionPolicy; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; +#[derive(OpenApi)] +#[openapi( + paths(get_batch, get_batches), + tags(( + name = "Batches", + description = "The /batches route gives information about the progress of batches of asynchronous operations.", + external_docs(url = "https://www.meilisearch.com/docs/reference/api/batches"), + )), +)] +pub struct BatchesApi; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::get().to(SeqHandler(get_batches)))) .service(web::resource("/{batch_id}").route(web::get().to(SeqHandler(get_batch)))); } +/// Get one batch +/// +/// Get a single batch. +#[utoipa::path( + get, + path = "/{batchUid}", + tag = "Batches", + security(("Bearer" = ["tasks.get", "tasks.*", "*"])), + params( + ("batchUid" = String, Path, example = "8685", description = "The unique batch id", nullable = false), + ), + responses( + (status = OK, description = "Return the batch", body = BatchView, content_type = "application/json", example = json!( + { + "uid": 1, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 1 + }, + "progress": null, + "stats": { + "totalNbTasks": 1, + "status": { + "succeeded": 1 + }, + "types": { + "documentAdditionOrUpdate": 1 + }, + "indexUids": { + "INDEX_NAME": 1 + } + }, + "duration": "PT0.364788S", + "startedAt": "2024-12-10T15:48:49.672141Z", + "finishedAt": "2024-12-10T15:48:50.036929Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] async fn get_batch( index_scheduler: GuardedData, Data>, batch_uid: web::Path, @@ -36,17 +95,17 @@ async fn get_batch( let query = index_scheduler::Query { batch_uids: Some(vec![batch_uid]), ..Query::default() }; let filters = index_scheduler.filters(); - let (batches, _) = index_scheduler.get_batches_from_authorized_indexes(query, filters)?; + let (batches, _) = index_scheduler.get_batches_from_authorized_indexes(&query, filters)?; if let Some(batch) = batches.first() { - let task_view = BatchView::from_batch(batch); - Ok(HttpResponse::Ok().json(task_view)) + let batch_view = BatchView::from_batch(batch); + Ok(HttpResponse::Ok().json(batch_view)) } else { Err(index_scheduler::Error::BatchNotFound(batch_uid).into()) } } -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, ToSchema)] pub struct AllBatches { results: Vec, total: u64, @@ -55,6 +114,63 @@ pub struct AllBatches { next: Option, } +/// Get batches +/// +/// List all batches, regardless of index. The batch objects are contained in the results array. +/// Batches are always returned in descending order of uid. This means that by default, the most recently created batch objects appear first. +/// Batch results are paginated and can be filtered with query parameters. +#[utoipa::path( + get, + path = "", + tag = "Batches", + security(("Bearer" = ["tasks.get", "tasks.*", "*"])), + params(TasksFilterQuery), + responses( + (status = OK, description = "Return the batches", body = AllBatches, content_type = "application/json", example = json!( + { + "results": [ + { + "uid": 2, + "details": { + "stopWords": [ + "of", + "the" + ] + }, + "progress": null, + "stats": { + "totalNbTasks": 1, + "status": { + "succeeded": 1 + }, + "types": { + "settingsUpdate": 1 + }, + "indexUids": { + "INDEX_NAME": 1 + } + }, + "duration": "PT0.110083S", + "startedAt": "2024-12-10T15:49:04.995321Z", + "finishedAt": "2024-12-10T15:49:05.105404Z" + } + ], + "total": 3, + "limit": 1, + "from": 2, + "next": 1 + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] async fn get_batches( index_scheduler: GuardedData, Data>, params: AwebQueryParameter, @@ -66,7 +182,7 @@ async fn get_batches( let query = params.into_query(); let filters = index_scheduler.filters(); - let (tasks, total) = index_scheduler.get_batches_from_authorized_indexes(query, filters)?; + let (tasks, total) = index_scheduler.get_batches_from_authorized_indexes(&query, filters)?; let mut results: Vec<_> = tasks.iter().map(BatchView::from_batch).collect(); // If we were able to fetch the number +1 tasks we asked diff --git a/crates/meilisearch/src/routes/dump.rs b/crates/meilisearch/src/routes/dump.rs index c78dc4dad..bc16409a2 100644 --- a/crates/meilisearch/src/routes/dump.rs +++ b/crates/meilisearch/src/routes/dump.rs @@ -5,6 +5,7 @@ use meilisearch_auth::AuthController; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; use tracing::debug; +use utoipa::OpenApi; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; @@ -13,12 +14,60 @@ use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::Opt; +#[derive(OpenApi)] +#[openapi( + paths(create_dump), + tags(( + name = "Dumps", + description = "The `dumps` route allows the creation of database dumps. +Dumps are `.dump` files that can be used to launch Meilisearch. Dumps are compatible between Meilisearch versions. +Creating a dump is also referred to as exporting it, whereas launching Meilisearch with a dump is referred to as importing it. +During a [dump export](https://www.meilisearch.com/docs/reference/api/dump#create-a-dump), all indexes of the current instance are +exported—together with their documents and settings—and saved as a single `.dump` file. During a dump import, +all indexes contained in the indicated `.dump` file are imported along with their associated documents and settings. +Any existing index with the same uid as an index in the dump file will be overwritten. +Dump imports are [performed at launch](https://www.meilisearch.com/docs/learn/advanced/dumps#importing-a-dump) using an option.", + external_docs(url = "https://www.meilisearch.com/docs/reference/api/dump"), + )), +)] +pub struct DumpApi; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); } crate::empty_analytics!(DumpAnalytics, "Dump Created"); +/// Create a dump +/// +/// Triggers a dump creation process. Once the process is complete, a dump is created in the +/// [dump directory](https://www.meilisearch.com/docs/learn/self_hosted/configure_meilisearch_at_launch#dump-directory). +/// If the dump directory does not exist yet, it will be created. +#[utoipa::path( + post, + path = "", + tag = "Dumps", + security(("Bearer" = ["dumps.create", "dumps.*", "*"])), + responses( + (status = 202, description = "Dump is being created", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 0, + "indexUid": null, + "status": "enqueued", + "type": "DumpCreation", + "enqueuedAt": "2021-01-01T09:39:00.000000Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn create_dump( index_scheduler: GuardedData, Data>, auth_controller: GuardedData, Data>, diff --git a/crates/meilisearch/src/routes/features.rs b/crates/meilisearch/src/routes/features.rs index 5d93adc02..fe41ad9d9 100644 --- a/crates/meilisearch/src/routes/features.rs +++ b/crates/meilisearch/src/routes/features.rs @@ -8,12 +8,26 @@ use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; use serde::Serialize; use tracing::debug; +use utoipa::{OpenApi, ToSchema}; use crate::analytics::{Aggregate, Analytics}; use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; +#[derive(OpenApi)] +#[openapi( + paths(get_features, patch_features), + tags(( + name = "Experimental features", + description = "The `/experimental-features` route allows you to activate or deactivate some of Meilisearch's experimental features. + +This route is **synchronous**. This means that no task object will be returned, and any activated or deactivated features will be made available or unavailable immediately.", + external_docs(url = "https://www.meilisearch.com/docs/reference/api/experimental_features"), + )), +)] +pub struct ExperimentalFeaturesApi; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") @@ -22,6 +36,32 @@ pub fn configure(cfg: &mut web::ServiceConfig) { ); } +/// Get all experimental features +/// +/// Get a list of all experimental features that can be activated via the /experimental-features route and whether or not they are currently activated. +#[utoipa::path( + get, + path = "", + tag = "Experimental features", + security(("Bearer" = ["experimental_features.get", "experimental_features.*", "*"])), + responses( + (status = OK, description = "Experimental features are returned", body = RuntimeTogglableFeatures, content_type = "application/json", example = json!(RuntimeTogglableFeatures { + vector_store: Some(true), + metrics: Some(true), + logs_route: Some(false), + edit_documents_by_function: Some(false), + contains_filter: Some(false), + })), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] async fn get_features( index_scheduler: GuardedData< ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_GET }>, @@ -35,8 +75,10 @@ async fn get_features( HttpResponse::Ok().json(features) } -#[derive(Debug, Deserr)] +#[derive(Debug, Deserr, ToSchema, Serialize)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] pub struct RuntimeTogglableFeatures { #[deserr(default)] pub vector_store: Option, @@ -79,6 +121,32 @@ impl Aggregate for PatchExperimentalFeatureAnalytics { } } +/// Configure experimental features +/// +/// Activate or deactivate experimental features. +#[utoipa::path( + patch, + path = "", + tag = "Experimental features", + security(("Bearer" = ["experimental_features.update", "experimental_features.*", "*"])), + responses( + (status = OK, description = "Experimental features are returned", body = RuntimeTogglableFeatures, content_type = "application/json", example = json!(RuntimeTogglableFeatures { + vector_store: Some(true), + metrics: Some(true), + logs_route: Some(false), + edit_documents_by_function: Some(false), + contains_filter: Some(false), + })), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] async fn patch_features( index_scheduler: GuardedData< ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_UPDATE }>, diff --git a/crates/meilisearch/src/routes/indexes/documents.rs b/crates/meilisearch/src/routes/indexes/documents.rs index 5f79000bd..3da24859d 100644 --- a/crates/meilisearch/src/routes/indexes/documents.rs +++ b/crates/meilisearch/src/routes/indexes/documents.rs @@ -31,6 +31,7 @@ use tempfile::tempfile; use tokio::fs::File; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; use tracing::debug; +use utoipa::{IntoParams, OpenApi, ToSchema}; use crate::analytics::{Aggregate, AggregateMethod, Analytics}; use crate::error::MeilisearchHttpError; @@ -71,6 +72,19 @@ pub struct DocumentParam { document_id: String, } +#[derive(OpenApi)] +#[openapi( + paths(get_document, get_documents, delete_document, replace_documents, update_documents, clear_all_documents, delete_documents_batch, delete_documents_by_filter, edit_documents_by_function, documents_by_query_post), + tags( + ( + name = "Documents", + description = "Documents are objects composed of fields that can store any type of data. Each field contains an attribute and its associated value. Documents are stored inside [indexes](https://www.meilisearch.com/docs/learn/getting_started/indexes).", + external_docs(url = "https://www.meilisearch.com/docs/learn/getting_started/documents"), + ), + ), +)] +pub struct DocumentsApi; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") @@ -93,12 +107,18 @@ pub fn configure(cfg: &mut web::ServiceConfig) { ); } -#[derive(Debug, Deserr)] +#[derive(Debug, Deserr, IntoParams, ToSchema)] #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] +#[into_params(rename_all = "camelCase", parameter_in = Query)] +#[schema(rename_all = "camelCase")] pub struct GetDocument { #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = Option>)] + #[schema(value_type = Option>)] fields: OptionStarOrList, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = Option)] + #[schema(value_type = Option)] retrieve_vectors: Param, } @@ -174,6 +194,55 @@ impl Aggregate for DocumentsFetchAggregator { } } +/// Get one document +/// +/// Get one document from its primary key. +#[utoipa::path( + get, + path = "{indexUid}/documents/{documentId}", + tag = "Documents", + security(("Bearer" = ["documents.get", "documents.*", "*"])), + params( + ("indexUid" = String, Path, example = "movies", description = "Index Unique Identifier", nullable = false), + ("documentId" = String, Path, example = "85087", description = "The document identifier", nullable = false), + GetDocument, + ), + responses( + (status = 200, description = "The document is returned", body = serde_json::Value, content_type = "application/json", example = json!( + { + "id": 25684, + "title": "American Ninja 5", + "poster": "https://image.tmdb.org/t/p/w1280/iuAQVI4mvjI83wnirpD8GVNRVuY.jpg", + "overview": "When a scientists daughter is kidnapped, American Ninja, attempts to find her, but this time he teams up with a youngster he has trained in the ways of the ninja.", + "release_date": 725846400 + } + )), + (status = 404, description = "Index not found", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Index `movies` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + } + )), + (status = 404, description = "Document not found", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Document `a` not found.", + "code": "document_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#document_not_found" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn get_document( index_scheduler: GuardedData, Data>, document_param: web::Path, @@ -237,6 +306,38 @@ impl Aggregate for DocumentsDeletionAggregator { } } +/// Delete a document +/// +/// Delete a single document by id. +#[utoipa::path( + delete, + path = "{indexUid}/documents/{documentId}", + tag = "Documents", + security(("Bearer" = ["documents.delete", "documents.*", "*"])), + params( + ("indexUid" = String, Path, example = "movies", description = "Index Unique Identifier", nullable = false), + ("documentId" = String, Path, example = "853", description = "Document Identifier", nullable = false), + ), + responses( + (status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": null, + "status": "enqueued", + "type": "documentAdditionOrUpdate", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn delete_document( index_scheduler: GuardedData, Data>, path: web::Path, @@ -271,36 +372,98 @@ pub async fn delete_document( Ok(HttpResponse::Accepted().json(task)) } -#[derive(Debug, Deserr)] +#[derive(Debug, Deserr, IntoParams)] #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] +#[into_params(rename_all = "camelCase", parameter_in = Query)] pub struct BrowseQueryGet { + #[param(default, value_type = Option)] #[deserr(default, error = DeserrQueryParamError)] offset: Param, + #[param(default, value_type = Option)] #[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError)] limit: Param, + #[param(default, value_type = Option>)] #[deserr(default, error = DeserrQueryParamError)] fields: OptionStarOrList, + #[param(default, value_type = Option)] #[deserr(default, error = DeserrQueryParamError)] retrieve_vectors: Param, + #[param(default, value_type = Option, example = "popularity > 1000")] #[deserr(default, error = DeserrQueryParamError)] filter: Option, } -#[derive(Debug, Deserr)] +#[derive(Debug, Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[schema(rename_all = "camelCase")] pub struct BrowseQuery { + #[schema(default, example = 150)] #[deserr(default, error = DeserrJsonError)] offset: usize, + #[schema(default = 20, example = 1)] #[deserr(default = PAGINATION_DEFAULT_LIMIT, error = DeserrJsonError)] limit: usize, + #[schema(example = json!(["title, description"]))] #[deserr(default, error = DeserrJsonError)] fields: Option>, + #[schema(default, example = true)] #[deserr(default, error = DeserrJsonError)] retrieve_vectors: bool, + #[schema(default, value_type = Option, example = "popularity > 1000")] #[deserr(default, error = DeserrJsonError)] filter: Option, } +/// Get documents with POST +/// +/// Get a set of documents. +#[utoipa::path( + post, + path = "{indexUid}/documents/fetch", + tag = "Documents", + security(("Bearer" = ["documents.delete", "documents.*", "*"])), + params(("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false)), + request_body = BrowseQuery, + responses( + (status = 200, description = "Task successfully enqueued", body = PaginationView, content_type = "application/json", example = json!( + { + "results":[ + { + "title":"The Travels of Ibn Battuta", + "genres":[ + "Travel", + "Adventure" + ], + "language":"English", + "rating":4.5 + }, + { + "title":"Pride and Prejudice", + "genres":[ + "Classics", + "Fiction", + "Romance", + "Literature" + ], + "language":"English", + "rating":4 + }, + ], + "offset":0, + "limit":2, + "total":5 + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn documents_by_query_post( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -326,6 +489,60 @@ pub async fn documents_by_query_post( documents_by_query(&index_scheduler, index_uid, body) } +/// Get documents +/// +/// Get documents by batches. +#[utoipa::path( + get, + path = "{indexUid}/documents", + tag = "Documents", + security(("Bearer" = ["documents.get", "documents.*", "*"])), + params( + ("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false), + BrowseQueryGet + ), + responses( + (status = 200, description = "The documents are returned", body = PaginationView, content_type = "application/json", example = json!( + { + "results": [ + { + "id": 25684, + "title": "American Ninja 5", + "poster": "https://image.tmdb.org/t/p/w1280/iuAQVI4mvjI83wnirpD8GVNRVuY.jpg", + "overview": "When a scientists daughter is kidnapped, American Ninja, attempts to find her, but this time he teams up with a youngster he has trained in the ways of the ninja.", + "release_date": 725846400 + }, + { + "id": 45881, + "title": "The Bridge of San Luis Rey", + "poster": "https://image.tmdb.org/t/p/w500/4X7quIcdkc24Cveg5XdpfRqxtYA.jpg", + "overview": "The Bridge of San Luis Rey is American author Thornton Wilder's second novel, first published in 1927 to worldwide acclaim. It tells the story of several interrelated people who die in the collapse of an Inca rope-fiber suspension bridge in Peru, and the events that lead up to their being on the bridge.[ A friar who has witnessed the tragic accident then goes about inquiring into the lives of the victims, seeking some sort of cosmic answer to the question of why each had to die. The novel won the Pulitzer Prize in 1928.", + "release_date": 1072915200 + } + ], + "limit": 20, + "offset": 0, + "total": 2 + } + )), + (status = 404, description = "Index not found", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Index `movies` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn get_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -396,11 +613,17 @@ fn documents_by_query( Ok(HttpResponse::Ok().json(ret)) } -#[derive(Deserialize, Debug, Deserr)] +#[derive(Deserialize, Debug, Deserr, IntoParams)] #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] +#[into_params(parameter_in = Query, rename_all = "camelCase")] pub struct UpdateDocumentsQuery { + /// The primary key of the documents. primaryKey is optional. If you want to set the primary key of your index through this route, + /// it only has to be done the first time you add documents to the index. After which it will be ignored if given. + #[param(example = "id")] #[deserr(default, error = DeserrQueryParamError)] pub primary_key: Option, + /// Customize the csv delimiter when importing CSV documents. + #[param(value_type = char, default = ",", example = ";")] #[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError, error = DeserrQueryParamError)] pub csv_delimiter: Option, } @@ -451,6 +674,52 @@ impl Aggregate for DocumentsAggregator { } } +/// Add or replace documents +/// +/// Add a list of documents or replace them if they already exist. +/// +/// If you send an already existing document (same id) the whole existing document will be overwritten by the new document. Fields previously in the document not present in the new document are removed. +/// +/// For a partial update of the document see Add or update documents route. +/// > info +/// > If the provided index does not exist, it will be created. +/// > info +/// > Use the reserved `_geo` object to add geo coordinates to a document. `_geo` is an object made of `lat` and `lng` field. +/// > +/// > When the vectorStore feature is enabled you can use the reserved `_vectors` field in your documents. +/// > It can accept an array of floats, multiple arrays of floats in an outer array or an object. +/// > This object accepts keys corresponding to the different embedders defined your index settings. +#[utoipa::path( + post, + path = "{indexUid}/documents", + tag = "Documents", + security(("Bearer" = ["documents.add", "documents.*", "*"])), + params( + ("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false), + // Here we can use the post version of the browse query since it contains the exact same parameter + UpdateDocumentsQuery, + ), + request_body = serde_json::Value, + responses( + (status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": null, + "status": "enqueued", + "type": "documentAdditionOrUpdate", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn replace_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -508,6 +777,50 @@ pub async fn replace_documents( Ok(HttpResponse::Accepted().json(task)) } +/// Add or update documents +/// +/// Add a list of documents or update them if they already exist. +/// If you send an already existing document (same id) the old document will be only partially updated according to the fields of the new document. Thus, any fields not present in the new document are kept and remained unchanged. +/// To completely overwrite a document, see Add or replace documents route. +/// > info +/// > If the provided index does not exist, it will be created. +/// > info +/// > Use the reserved `_geo` object to add geo coordinates to a document. `_geo` is an object made of `lat` and `lng` field. +/// > +/// > When the vectorStore feature is enabled you can use the reserved `_vectors` field in your documents. +/// > It can accept an array of floats, multiple arrays of floats in an outer array or an object. +/// > This object accepts keys corresponding to the different embedders defined your index settings. +#[utoipa::path( + put, + path = "{indexUid}/documents", + tag = "Documents", + security(("Bearer" = ["documents.add", "documents.*", "*"])), + params( + ("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false), + // Here we can use the post version of the browse query since it contains the exact same parameter + UpdateDocumentsQuery, + ), + request_body = serde_json::Value, + responses( + (status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": null, + "status": "enqueued", + "type": "documentAdditionOrUpdate", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn update_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -608,7 +921,7 @@ async fn document_addition( } }; - let (uuid, mut update_file) = index_scheduler.create_update_file(dry_run)?; + let (uuid, mut update_file) = index_scheduler.queue.create_update_file(dry_run)?; let documents_count = match format { PayloadType::Ndjson => { let (path, file) = update_file.into_parts(); @@ -670,7 +983,7 @@ async fn document_addition( Err(e) => { // Here the file MAY have been persisted or not. // We don't know thus we ignore the file not found error. - match index_scheduler.delete_update_file(uuid) { + match index_scheduler.queue.delete_update_file(uuid) { Ok(()) => (), Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e))) if e.kind() == ErrorKind::NotFound => {} @@ -701,7 +1014,7 @@ async fn document_addition( { Ok(task) => task, Err(e) => { - index_scheduler.delete_update_file(uuid)?; + index_scheduler.queue.delete_update_file(uuid)?; return Err(e.into()); } }; @@ -742,6 +1055,38 @@ async fn copy_body_to_file( Ok(read_file) } +/// Delete documents by batch +/// +/// Delete a set of documents based on an array of document ids. +#[utoipa::path( + post, + path = "{indexUid}/delete-batch", + tag = "Documents", + security(("Bearer" = ["documents.delete", "documents.*", "*"])), + params( + ("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false), + ), + request_body = Vec, + responses( + (status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": null, + "status": "enqueued", + "type": "documentAdditionOrUpdate", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn delete_documents_batch( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -781,13 +1126,44 @@ pub async fn delete_documents_batch( Ok(HttpResponse::Accepted().json(task)) } -#[derive(Debug, Deserr)] +#[derive(Debug, Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[schema(rename_all = "camelCase")] pub struct DocumentDeletionByFilter { #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_document_filter)] filter: Value, } +/// Delete documents by filter +/// +/// Delete a set of documents based on a filter. +#[utoipa::path( + post, + path = "{indexUid}/documents/delete", + tag = "Documents", + security(("Bearer" = ["documents.delete", "documents.*", "*"])), + params(("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false)), + request_body = DocumentDeletionByFilter, + responses( + (status = ACCEPTED, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": null, + "status": "enqueued", + "type": "documentDeletion", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn delete_documents_by_filter( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -828,13 +1204,16 @@ pub async fn delete_documents_by_filter( Ok(HttpResponse::Accepted().json(task)) } -#[derive(Debug, Deserr)] +#[derive(Debug, Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct DocumentEditionByFunction { + /// A string containing a RHAI function. #[deserr(default, error = DeserrJsonError)] pub filter: Option, + /// A string containing a filter expression. #[deserr(default, error = DeserrJsonError)] pub context: Option, + /// An object with data Meilisearch should make available for the editing function. #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_document_edition_function)] pub function: String, } @@ -867,6 +1246,38 @@ impl Aggregate for EditDocumentsByFunctionAggregator { } } +/// Edit documents by function. +/// +/// Use a [RHAI function](https://rhai.rs/book/engine/hello-world.html) to edit one or more documents directly in Meilisearch. +#[utoipa::path( + post, + path = "{indexUid}/documents/edit", + tag = "Documents", + security(("Bearer" = ["documents.*", "*"])), + params( + ("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false), + ), + request_body = DocumentEditionByFunction, + responses( + (status = 202, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": null, + "status": "enqueued", + "type": "documentDeletion", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn edit_documents_by_function( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -936,6 +1347,35 @@ pub async fn edit_documents_by_function( Ok(HttpResponse::Accepted().json(task)) } +/// Delete all documents +/// +/// Delete all documents in the specified index. +#[utoipa::path( + delete, + path = "{indexUid}/documents", + tag = "Documents", + security(("Bearer" = ["documents.delete", "documents.*", "*"])), + params(("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false)), + responses( + (status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": null, + "status": "enqueued", + "type": "documentDeletion", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn clear_all_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, diff --git a/crates/meilisearch/src/routes/indexes/facet_search.rs b/crates/meilisearch/src/routes/indexes/facet_search.rs index ff11f1305..7a41f1f81 100644 --- a/crates/meilisearch/src/routes/indexes/facet_search.rs +++ b/crates/meilisearch/src/routes/indexes/facet_search.rs @@ -11,6 +11,7 @@ use meilisearch_types::index_uid::IndexUid; use meilisearch_types::locales::Locale; use serde_json::Value; use tracing::debug; +use utoipa::{OpenApi, ToSchema}; use crate::analytics::{Aggregate, Analytics}; use crate::extractors::authentication::policies::*; @@ -18,20 +19,33 @@ use crate::extractors::authentication::GuardedData; use crate::routes::indexes::search::search_kind; use crate::search::{ add_search_rules, perform_facet_search, FacetSearchResult, HybridQuery, MatchingStrategy, - RankingScoreThreshold, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + RankingScoreThreshold, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, }; use crate::search_queue::SearchQueue; +#[derive(OpenApi)] +#[openapi( + paths(search), + tags( + ( + name = "Facet Search", + description = "The `/facet-search` route allows you to search for facet values. Facet search supports prefix search and typo tolerance. The returned hits are sorted lexicographically in ascending order. You can configure how facets are sorted using the sortFacetValuesBy property of the faceting index settings.", + external_docs(url = "https://www.meilisearch.com/docs/reference/api/facet_search"), + ), + ), +)] +pub struct FacetSearchApi; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(search))); } -/// # Important -/// -/// Intentionally don't use `deny_unknown_fields` to ignore search parameters sent by user -#[derive(Debug, Clone, Default, PartialEq, deserr::Deserr)] +// # Important +// +// Intentionally don't use `deny_unknown_fields` to ignore search parameters sent by user +#[derive(Debug, Clone, Default, PartialEq, deserr::Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase)] pub struct FacetSearchQuery { #[deserr(default, error = DeserrJsonError)] @@ -158,6 +172,60 @@ impl Aggregate for FacetSearchAggregator { } } +/// Perform a facet search +/// +/// Search for a facet value within a given facet. +#[utoipa::path( + post, + path = "{indexUid}/facet-search", + tag = "Facet Search", + security(("Bearer" = ["search", "*"])), + params(("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false)), + request_body = FacetSearchQuery, + responses( + (status = 200, description = "The documents are returned", body = SearchResult, content_type = "application/json", example = json!( + { + "hits": [ + { + "id": 2770, + "title": "American Pie 2", + "poster": "https://image.tmdb.org/t/p/w1280/q4LNgUnRfltxzp3gf1MAGiK5LhV.jpg", + "overview": "The whole gang are back and as close as ever. They decide to get even closer by spending the summer together at a beach house. They decide to hold the biggest…", + "release_date": 997405200 + }, + { + "id": 190859, + "title": "American Sniper", + "poster": "https://image.tmdb.org/t/p/w1280/svPHnYE7N5NAGO49dBmRhq0vDQ3.jpg", + "overview": "U.S. Navy SEAL Chris Kyle takes his sole mission—protect his comrades—to heart and becomes one of the most lethal snipers in American history. His pinpoint accuracy not only saves countless lives but also makes him a prime…", + "release_date": 1418256000 + } + ], + "offset": 0, + "limit": 2, + "estimatedTotalHits": 976, + "processingTimeMs": 35, + "query": "american " + } + )), + (status = 404, description = "Index not found", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Index `movies` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn search( index_scheduler: GuardedData, Data>, search_queue: Data, diff --git a/crates/meilisearch/src/routes/indexes/mod.rs b/crates/meilisearch/src/routes/indexes/mod.rs index 26a6569e7..a03d5f691 100644 --- a/crates/meilisearch/src/routes/indexes/mod.rs +++ b/crates/meilisearch/src/routes/indexes/mod.rs @@ -16,8 +16,11 @@ use meilisearch_types::tasks::KindWithContent; use serde::Serialize; use time::OffsetDateTime; use tracing::debug; +use utoipa::{IntoParams, OpenApi, ToSchema}; -use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; +use super::{ + get_task_id, Pagination, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, +}; use crate::analytics::{Aggregate, Analytics}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; @@ -36,6 +39,25 @@ mod settings_analytics; pub mod similar; mod similar_analytics; +#[derive(OpenApi)] +#[openapi( + nest( + (path = "/", api = documents::DocumentsApi), + (path = "/", api = facet_search::FacetSearchApi), + (path = "/", api = similar::SimilarApi), + (path = "/", api = settings::SettingsApi), + ), + paths(list_indexes, create_index, get_index, update_index, delete_index, get_index_stats), + tags( + ( + name = "Indexes", + description = "An index is an entity that gathers a set of [documents](https://www.meilisearch.com/docs/learn/getting_started/documents) with its own [settings](https://www.meilisearch.com/docs/reference/api/settings). Learn more about indexes.", + external_docs(url = "https://www.meilisearch.com/docs/reference/api/indexes"), + ), + ), +)] +pub struct IndexesApi; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") @@ -59,14 +81,18 @@ pub fn configure(cfg: &mut web::ServiceConfig) { ); } -#[derive(Debug, Serialize, Clone)] +#[derive(Debug, Serialize, Clone, ToSchema)] #[serde(rename_all = "camelCase")] pub struct IndexView { + /// Unique identifier for the index pub uid: String, + /// An `RFC 3339` format for date/time/duration. #[serde(with = "time::serde::rfc3339")] pub created_at: OffsetDateTime, + /// An `RFC 3339` format for date/time/duration. #[serde(with = "time::serde::rfc3339")] pub updated_at: OffsetDateTime, + /// Custom primaryKey for documents pub primary_key: Option, } @@ -84,20 +110,61 @@ impl IndexView { } } -#[derive(Deserr, Debug, Clone, Copy)] +#[derive(Deserr, Debug, Clone, Copy, IntoParams)] #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] +#[into_params(rename_all = "camelCase", parameter_in = Query)] pub struct ListIndexes { + /// The number of indexes to skip before starting to retrieve anything + #[param(value_type = Option, default, example = 100)] #[deserr(default, error = DeserrQueryParamError)] pub offset: Param, + /// The number of indexes to retrieve + #[param(value_type = Option, default = 20, example = 1)] #[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError)] pub limit: Param, } + impl ListIndexes { fn as_pagination(self) -> Pagination { Pagination { offset: self.offset.0, limit: self.limit.0 } } } +/// List indexes +/// +/// List all indexes. +#[utoipa::path( + get, + path = "", + tag = "Indexes", + security(("Bearer" = ["indexes.get", "indexes.*", "*"])), + params(ListIndexes), + responses( + (status = 200, description = "Indexes are returned", body = PaginationView, content_type = "application/json", example = json!( + { + "results": [ + { + "uid": "movies", + "primaryKey": "movie_id", + "createdAt": "2019-11-20T09:40:33.711324Z", + "updatedAt": "2019-11-20T09:40:33.711324Z" + } + ], + "limit": 1, + "offset": 0, + "total": 1 + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn list_indexes( index_scheduler: GuardedData, Data>, paginate: AwebQueryParameter, @@ -121,11 +188,16 @@ pub async fn list_indexes( Ok(HttpResponse::Ok().json(ret)) } -#[derive(Deserr, Debug)] +#[derive(Deserr, Debug, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[schema(rename_all = "camelCase")] pub struct IndexCreateRequest { + /// The name of the index + #[schema(example = "movies")] #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_index_uid)] uid: IndexUid, + /// The primary key of the index + #[schema(example = "id")] #[deserr(default, error = DeserrJsonError)] primary_key: Option, } @@ -149,6 +221,35 @@ impl Aggregate for IndexCreatedAggregate { } } +/// Create index +/// +/// Create an index. +#[utoipa::path( + post, + path = "", + tag = "Indexes", + security(("Bearer" = ["indexes.create", "indexes.*", "*"])), + request_body = IndexCreateRequest, + responses( + (status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": "movies", + "status": "enqueued", + "type": "indexCreation", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn create_index( index_scheduler: GuardedData, Data>, body: AwebJson, @@ -198,13 +299,42 @@ fn deny_immutable_fields_index( } } -#[derive(Deserr, Debug)] -#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields = deny_immutable_fields_index)] -pub struct UpdateIndexRequest { - #[deserr(default, error = DeserrJsonError)] - primary_key: Option, -} - +/// Get index +/// +/// Get information about an index. +#[utoipa::path( + get, + path = "/{indexUid}", + tag = "Indexes", + security(("Bearer" = ["indexes.get", "indexes.*", "*"])), + params(("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false)), + responses( + (status = 200, description = "The index is returned", body = IndexView, content_type = "application/json", example = json!( + { + "uid": "movies", + "primaryKey": "movie_id", + "createdAt": "2019-11-20T09:40:33.711324Z", + "updatedAt": "2019-11-20T09:40:33.711324Z" + } + )), + (status = 404, description = "Index not found", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Index `movies` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn get_index( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -237,6 +367,47 @@ impl Aggregate for IndexUpdatedAggregate { serde_json::to_value(*self).unwrap_or_default() } } + +#[derive(Deserr, Debug, ToSchema)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields = deny_immutable_fields_index)] +#[schema(rename_all = "camelCase")] +pub struct UpdateIndexRequest { + /// The new primary key of the index + #[deserr(default, error = DeserrJsonError)] + primary_key: Option, +} + +/// Update index +/// +/// Update the `primaryKey` of an index. +/// Return an error if the index doesn't exists yet or if it contains documents. +#[utoipa::path( + patch, + path = "/{indexUid}", + tag = "Indexes", + security(("Bearer" = ["indexes.update", "indexes.*", "*"])), + params(("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false)), + request_body = UpdateIndexRequest, + responses( + (status = ACCEPTED, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 0, + "indexUid": "movies", + "status": "enqueued", + "type": "indexUpdate", + "enqueuedAt": "2021-01-01T09:39:00.000000Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn update_index( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -269,6 +440,35 @@ pub async fn update_index( Ok(HttpResponse::Accepted().json(task)) } +/// Delete index +/// +/// Delete an index. +#[utoipa::path( + delete, + path = "/{indexUid}", + tag = "Indexes", + security(("Bearer" = ["indexes.delete", "indexes.*", "*"])), + params(("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false)), + responses( + (status = ACCEPTED, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 0, + "indexUid": "movies", + "status": "enqueued", + "type": "indexDeletion", + "enqueuedAt": "2021-01-01T09:39:00.000000Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn delete_index( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -289,14 +489,15 @@ pub async fn delete_index( } /// Stats of an `Index`, as known to the `stats` route. -#[derive(Serialize, Debug)] +#[derive(Serialize, Debug, ToSchema)] #[serde(rename_all = "camelCase")] pub struct IndexStats { /// Number of documents in the index pub number_of_documents: u64, - /// Whether the index is currently performing indexation, according to the scheduler. + /// Whether or not the index is currently ingesting document pub is_indexing: bool, /// Association of every field name with the number of times it occurs in the documents. + #[schema(value_type = HashMap)] pub field_distribution: FieldDistribution, } @@ -310,6 +511,44 @@ impl From for IndexStats { } } +/// Get stats of index +/// +/// Get the stats of an index. +#[utoipa::path( + get, + path = "/{indexUid}/stats", + tag = "Stats", + security(("Bearer" = ["stats.get", "stats.*", "*"])), + params(("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false)), + responses( + (status = OK, description = "The stats of the index", body = IndexStats, content_type = "application/json", example = json!( + { + "numberOfDocuments": 10, + "isIndexing": true, + "fieldDistribution": { + "genre": 10, + "author": 9 + } + } + )), + (status = 404, description = "Index not found", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Index `movies` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn get_index_stats( index_scheduler: GuardedData, Data>, index_uid: web::Path, diff --git a/crates/meilisearch/src/routes/indexes/search.rs b/crates/meilisearch/src/routes/indexes/search.rs index 291193c4e..ca3c61753 100644 --- a/crates/meilisearch/src/routes/indexes/search.rs +++ b/crates/meilisearch/src/routes/indexes/search.rs @@ -12,6 +12,7 @@ use meilisearch_types::milli; use meilisearch_types::serde_cs::vec::CS; use serde_json::Value; use tracing::debug; +use utoipa::{IntoParams, OpenApi}; use crate::analytics::Analytics; use crate::error::MeilisearchHttpError; @@ -22,12 +23,28 @@ use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST}; use crate::search::{ add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, - RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, + RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, }; use crate::search_queue::SearchQueue; +#[derive(OpenApi)] +#[openapi( + paths(search_with_url_query, search_with_post), + tags( + ( + name = "Search", + description = "Meilisearch exposes two routes to perform searches: + +- A POST route: this is the preferred route when using API authentication, as it allows [preflight request](https://developer.mozilla.org/en-US/docs/Glossary/Preflight_request) caching and better performance. +- A GET route: the usage of this route is discouraged, unless you have good reason to do otherwise (specific caching abilities for example)", + external_docs(url = "https://www.meilisearch.com/docs/reference/api/search"), + ), + ), +)] +pub struct SearchApi; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") @@ -36,30 +53,41 @@ pub fn configure(cfg: &mut web::ServiceConfig) { ); } -#[derive(Debug, deserr::Deserr)] +#[derive(Debug, deserr::Deserr, IntoParams)] #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] +#[into_params(rename_all = "camelCase", parameter_in = Query)] pub struct SearchQueryGet { #[deserr(default, error = DeserrQueryParamError)] q: Option, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = Vec, explode = false)] vector: Option>, #[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError)] + #[param(value_type = usize, default = DEFAULT_SEARCH_OFFSET)] offset: Param, #[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError)] + #[param(value_type = usize, default = DEFAULT_SEARCH_LIMIT)] limit: Param, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = Option)] page: Option>, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = Option)] hits_per_page: Option>, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = Vec, explode = false)] attributes_to_retrieve: Option>, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = bool, default)] retrieve_vectors: Param, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = Vec, explode = false)] attributes_to_crop: Option>, #[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError)] + #[param(value_type = usize, default = DEFAULT_CROP_LENGTH)] crop_length: Param, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = Vec, explode = false)] attributes_to_highlight: Option>, #[deserr(default, error = DeserrQueryParamError)] filter: Option, @@ -68,30 +96,41 @@ pub struct SearchQueryGet { #[deserr(default, error = DeserrQueryParamError)] distinct: Option, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = bool)] show_matches_position: Param, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = bool)] show_ranking_score: Param, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = bool)] show_ranking_score_details: Param, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = Vec, explode = false)] facets: Option>, - #[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError)] + #[deserr(default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError)] + #[param(default = DEFAULT_HIGHLIGHT_PRE_TAG)] highlight_pre_tag: String, - #[deserr( default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError)] + #[deserr(default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError)] + #[param(default = DEFAULT_HIGHLIGHT_POST_TAG)] highlight_post_tag: String, #[deserr(default = DEFAULT_CROP_MARKER(), error = DeserrQueryParamError)] + #[param(default = DEFAULT_CROP_MARKER)] crop_marker: String, #[deserr(default, error = DeserrQueryParamError)] matching_strategy: MatchingStrategy, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = Vec, explode = false)] pub attributes_to_search_on: Option>, #[deserr(default, error = DeserrQueryParamError)] pub hybrid_embedder: Option, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = f32)] pub hybrid_semantic_ratio: Option, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = f32)] pub ranking_score_threshold: Option, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = Vec, explode = false)] pub locales: Option>, } @@ -220,6 +259,62 @@ pub fn fix_sort_query_parameters(sort_query: &str) -> Vec { sort_parameters } +/// Search an index with GET +/// +/// Search for documents matching a specific query in the given index. +#[utoipa::path( + get, + path = "/{indexUid}/search", + tags = ["Indexes", "Search"], + security(("Bearer" = ["search", "*"])), + params( + ("indexUid" = String, Path, example = "movies", description = "Index Unique Identifier", nullable = false), + SearchQueryGet + ), + responses( + (status = 200, description = "The documents are returned", body = SearchResult, content_type = "application/json", example = json!( + { + "hits": [ + { + "id": 2770, + "title": "American Pie 2", + "poster": "https://image.tmdb.org/t/p/w1280/q4LNgUnRfltxzp3gf1MAGiK5LhV.jpg", + "overview": "The whole gang are back and as close as ever. They decide to get even closer by spending the summer together at a beach house. They decide to hold the biggest…", + "release_date": 997405200 + }, + { + "id": 190859, + "title": "American Sniper", + "poster": "https://image.tmdb.org/t/p/w1280/svPHnYE7N5NAGO49dBmRhq0vDQ3.jpg", + "overview": "U.S. Navy SEAL Chris Kyle takes his sole mission—protect his comrades—to heart and becomes one of the most lethal snipers in American history. His pinpoint accuracy not only saves countless lives but also makes him a prime…", + "release_date": 1418256000 + } + ], + "offset": 0, + "limit": 2, + "estimatedTotalHits": 976, + "processingTimeMs": 35, + "query": "american " + } + )), + (status = 404, description = "Index not found", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Index `movies` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn search_with_url_query( index_scheduler: GuardedData, Data>, search_queue: web::Data, @@ -271,6 +366,62 @@ pub async fn search_with_url_query( Ok(HttpResponse::Ok().json(search_result)) } +/// Search with POST +/// +/// Search for documents matching a specific query in the given index. +#[utoipa::path( + post, + path = "/{indexUid}/search", + tags = ["Indexes", "Search"], + security(("Bearer" = ["search", "*"])), + params( + ("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false), + ), + request_body = SearchQuery, + responses( + (status = 200, description = "The documents are returned", body = SearchResult, content_type = "application/json", example = json!( + { + "hits": [ + { + "id": 2770, + "title": "American Pie 2", + "poster": "https://image.tmdb.org/t/p/w1280/q4LNgUnRfltxzp3gf1MAGiK5LhV.jpg", + "overview": "The whole gang are back and as close as ever. They decide to get even closer by spending the summer together at a beach house. They decide to hold the biggest…", + "release_date": 997405200 + }, + { + "id": 190859, + "title": "American Sniper", + "poster": "https://image.tmdb.org/t/p/w1280/svPHnYE7N5NAGO49dBmRhq0vDQ3.jpg", + "overview": "U.S. Navy SEAL Chris Kyle takes his sole mission—protect his comrades—to heart and becomes one of the most lethal snipers in American history. His pinpoint accuracy not only saves countless lives but also makes him a prime…", + "release_date": 1418256000 + } + ], + "offset": 0, + "limit": 2, + "estimatedTotalHits": 976, + "processingTimeMs": 35, + "query": "american " + } + )), + (status = 404, description = "Index not found", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Index `movies` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn search_with_post( index_scheduler: GuardedData, Data>, search_queue: web::Data, diff --git a/crates/meilisearch/src/routes/indexes/settings.rs b/crates/meilisearch/src/routes/indexes/settings.rs index b2922e5ff..e2138ee4d 100644 --- a/crates/meilisearch/src/routes/indexes/settings.rs +++ b/crates/meilisearch/src/routes/indexes/settings.rs @@ -6,9 +6,12 @@ use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::update::Setting; -use meilisearch_types::settings::{settings, SecretPolicy, Settings, Unchecked}; +use meilisearch_types::settings::{ + settings, SecretPolicy, SettingEmbeddingSettings, Settings, Unchecked, +}; use meilisearch_types::tasks::KindWithContent; use tracing::debug; +use utoipa::OpenApi; use super::settings_analytics::*; use crate::analytics::Analytics; @@ -29,6 +32,19 @@ macro_rules! make_setting_routes { make_setting_route!($route, $update_verb, $type, $err_ty, $attr, $camelcase_attr, $analytics); )* + #[derive(OpenApi)] + #[openapi( + paths(update_all, get_all, delete_all, $( $attr::get, $attr::update, $attr::delete,)*), + tags( + ( + name = "Settings", + description = "Use the /settings route to customize search settings for a given index. You can either modify all index settings at once using the update settings endpoint, or use a child route to configure a single setting.", + external_docs(url = "https://www.meilisearch.com/docs/reference/api/settings"), + ), + ), + )] + pub struct SettingsApi; + pub fn configure(cfg: &mut web::ServiceConfig) { use crate::extractors::sequential_extractor::SeqHandler; cfg.service( @@ -62,7 +78,39 @@ macro_rules! make_setting_route { use $crate::extractors::sequential_extractor::SeqHandler; use $crate::Opt; use $crate::routes::{is_dry_run, get_task_id, SummarizedTaskView}; + #[allow(unused_imports)] + use super::*; + #[utoipa::path( + delete, + path = concat!("{indexUid}/settings", $route), + tag = "Settings", + security(("Bearer" = ["settings.update", "settings.*", "*"])), + operation_id = concat!("delete", $camelcase_attr), + summary = concat!("Reset ", $camelcase_attr), + description = concat!("Reset an index's ", $camelcase_attr, " to its default value"), + params(("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false)), + request_body = $type, + responses( + (status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": "movies", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) + )] pub async fn delete( index_scheduler: GuardedData< ActionPolicy<{ actions::SETTINGS_UPDATE }>, @@ -96,6 +144,37 @@ macro_rules! make_setting_route { Ok(HttpResponse::Accepted().json(task)) } + + #[utoipa::path( + $update_verb, + path = concat!("{indexUid}/settings", $route), + tag = "Settings", + security(("Bearer" = ["settings.update", "settings.*", "*"])), + operation_id = concat!(stringify!($update_verb), $camelcase_attr), + summary = concat!("Update ", $camelcase_attr), + description = concat!("Update an index's user defined ", $camelcase_attr), + params(("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false)), + request_body = $type, + responses( + (status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": "movies", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) + )] pub async fn update( index_scheduler: GuardedData< ActionPolicy<{ actions::SETTINGS_UPDATE }>, @@ -151,6 +230,30 @@ macro_rules! make_setting_route { Ok(HttpResponse::Accepted().json(task)) } + + #[utoipa::path( + get, + path = concat!("{indexUid}/settings", $route), + tag = "Settings", + summary = concat!("Get ", $camelcase_attr), + description = concat!("Get an user defined ", $camelcase_attr), + security(("Bearer" = ["settings.get", "settings.*", "*"])), + operation_id = concat!("get", $camelcase_attr), + params(("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false)), + responses( + (status = 200, description = concat!($camelcase_attr, " is returned"), body = $type, content_type = "application/json", example = json!( + <$type>::default() + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) + )] pub async fn get( index_scheduler: GuardedData< ActionPolicy<{ actions::SETTINGS_GET }>, @@ -359,7 +462,7 @@ make_setting_routes!( { route: "/embedders", update_verb: patch, - value_type: std::collections::BTreeMap>, + value_type: std::collections::BTreeMap, err_type: meilisearch_types::deserr::DeserrJsonError< meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders, >, @@ -402,6 +505,39 @@ make_setting_routes!( }, ); +#[utoipa::path( + patch, + path = "{indexUid}/settings", + tag = "Settings", + security(("Bearer" = ["settings.update", "settings.*", "*"])), + params(("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false)), + request_body = Settings, + responses( + (status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": "movies", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] +/// Update settings +/// +/// Update the settings of an index. +/// Passing null to an index setting will reset it to its default value. +/// Updates in the settings route are partial. This means that any parameters not provided in the body will be left unchanged. +/// If the provided index does not exist, it will be created. pub async fn update_all( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -479,6 +615,29 @@ pub async fn update_all( Ok(HttpResponse::Accepted().json(task)) } +#[utoipa::path( + get, + path = "{indexUid}/settings", + tag = "Settings", + security(("Bearer" = ["settings.update", "settings.*", "*"])), + params(("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false)), + responses( + (status = 200, description = "Settings are returned", body = Settings, content_type = "application/json", example = json!( + Settings::::default() + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] +/// All settings +/// +/// This route allows you to retrieve, configure, or reset all of an index's settings at once. pub async fn get_all( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -492,6 +651,35 @@ pub async fn get_all( Ok(HttpResponse::Ok().json(new_settings)) } +#[utoipa::path( + delete, + path = "{indexUid}/settings", + tag = "Settings", + security(("Bearer" = ["settings.update", "settings.*", "*"])), + params(("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false)), + responses( + (status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": "movies", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] +/// Reset settings +/// +/// Reset all the settings of an index to their default value. pub async fn delete_all( index_scheduler: GuardedData, Data>, index_uid: web::Path, diff --git a/crates/meilisearch/src/routes/indexes/settings_analytics.rs b/crates/meilisearch/src/routes/indexes/settings_analytics.rs index ddca2c00a..ffeadcab6 100644 --- a/crates/meilisearch/src/routes/indexes/settings_analytics.rs +++ b/crates/meilisearch/src/routes/indexes/settings_analytics.rs @@ -8,10 +8,9 @@ use std::collections::{BTreeMap, BTreeSet, HashSet}; use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView}; use meilisearch_types::milli::update::Setting; -use meilisearch_types::milli::vector::settings::EmbeddingSettings; use meilisearch_types::settings::{ FacetingSettings, PaginationSettings, PrefixSearchSettings, ProximityPrecisionView, - RankingRuleView, TypoSettings, + RankingRuleView, SettingEmbeddingSettings, TypoSettings, }; use serde::Serialize; @@ -497,13 +496,13 @@ pub struct EmbeddersAnalytics { } impl EmbeddersAnalytics { - pub fn new(setting: Option<&BTreeMap>>) -> Self { + pub fn new(setting: Option<&BTreeMap>) -> Self { let mut sources = std::collections::HashSet::new(); if let Some(s) = &setting { for source in s .values() - .filter_map(|config| config.clone().set()) + .filter_map(|config| config.inner.clone().set()) .filter_map(|config| config.source.set()) { use meilisearch_types::milli::vector::settings::EmbedderSource; @@ -522,18 +521,18 @@ impl EmbeddersAnalytics { sources: Some(sources), document_template_used: setting.as_ref().map(|map| { map.values() - .filter_map(|config| config.clone().set()) + .filter_map(|config| config.inner.clone().set()) .any(|config| config.document_template.set().is_some()) }), document_template_max_bytes: setting.as_ref().and_then(|map| { map.values() - .filter_map(|config| config.clone().set()) + .filter_map(|config| config.inner.clone().set()) .filter_map(|config| config.document_template_max_bytes.set()) .max() }), binary_quantization_used: setting.as_ref().map(|map| { map.values() - .filter_map(|config| config.clone().set()) + .filter_map(|config| config.inner.clone().set()) .any(|config| config.binary_quantized.set().is_some()) }), } diff --git a/crates/meilisearch/src/routes/indexes/similar.rs b/crates/meilisearch/src/routes/indexes/similar.rs index f47771061..4e0673a7d 100644 --- a/crates/meilisearch/src/routes/indexes/similar.rs +++ b/crates/meilisearch/src/routes/indexes/similar.rs @@ -11,6 +11,7 @@ use meilisearch_types::keys::actions; use meilisearch_types::serde_cs::vec::CS; use serde_json::Value; use tracing::debug; +use utoipa::{IntoParams, OpenApi}; use super::ActionPolicy; use crate::analytics::Analytics; @@ -22,6 +23,21 @@ use crate::search::{ SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, }; +#[derive(OpenApi)] +#[openapi( + paths(similar_get, similar_post), + tags( + ( + name = "Similar documents", + description = "The /similar route uses AI-powered search to return a number of documents similar to a target document. + +Meilisearch exposes two routes for retrieving similar documents: POST and GET. In the majority of cases, POST will offer better performance and ease of use.", + external_docs(url = "https://www.meilisearch.com/docs/reference/api/similar"), + ), + ), +)] +pub struct SimilarApi; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") @@ -30,6 +46,62 @@ pub fn configure(cfg: &mut web::ServiceConfig) { ); } +/// Get similar documents with GET +/// +/// Retrieve documents similar to a specific search result. +#[utoipa::path( + get, + path = "{indexUid}/similar", + tag = "Similar documents", + security(("Bearer" = ["search", "*"])), + params( + ("indexUid" = String, Path, example = "movies", description = "Index Unique Identifier", nullable = false), + SimilarQueryGet + ), + responses( + (status = 200, description = "The documents are returned", body = SimilarResult, content_type = "application/json", example = json!( + { + "hits": [ + { + "id": 2770, + "title": "American Pie 2", + "poster": "https://image.tmdb.org/t/p/w1280/q4LNgUnRfltxzp3gf1MAGiK5LhV.jpg", + "overview": "The whole gang are back and as close as ever. They decide to get even closer by spending the summer together at a beach house. They decide to hold the biggest…", + "release_date": 997405200 + }, + { + "id": 190859, + "title": "American Sniper", + "poster": "https://image.tmdb.org/t/p/w1280/svPHnYE7N5NAGO49dBmRhq0vDQ3.jpg", + "overview": "U.S. Navy SEAL Chris Kyle takes his sole mission—protect his comrades—to heart and becomes one of the most lethal snipers in American history. His pinpoint accuracy not only saves countless lives but also makes him a prime…", + "release_date": 1418256000 + } + ], + "offset": 0, + "limit": 2, + "estimatedTotalHits": 976, + "processingTimeMs": 35, + "query": "american " + } + )), + (status = 404, description = "Index not found", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Index `movies` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn similar_get( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -58,6 +130,60 @@ pub async fn similar_get( Ok(HttpResponse::Ok().json(similar)) } +/// Get similar documents with POST +/// +/// Retrieve documents similar to a specific search result. +#[utoipa::path( + post, + path = "{indexUid}/similar", + tag = "Similar documents", + security(("Bearer" = ["search", "*"])), + params(("indexUid" = String, Path, example = "movies", description = "Index Unique Identifier", nullable = false)), + request_body = SimilarQuery, + responses( + (status = 200, description = "The documents are returned", body = SimilarResult, content_type = "application/json", example = json!( + { + "hits": [ + { + "id": 2770, + "title": "American Pie 2", + "poster": "https://image.tmdb.org/t/p/w1280/q4LNgUnRfltxzp3gf1MAGiK5LhV.jpg", + "overview": "The whole gang are back and as close as ever. They decide to get even closer by spending the summer together at a beach house. They decide to hold the biggest…", + "release_date": 997405200 + }, + { + "id": 190859, + "title": "American Sniper", + "poster": "https://image.tmdb.org/t/p/w1280/svPHnYE7N5NAGO49dBmRhq0vDQ3.jpg", + "overview": "U.S. Navy SEAL Chris Kyle takes his sole mission—protect his comrades—to heart and becomes one of the most lethal snipers in American history. His pinpoint accuracy not only saves countless lives but also makes him a prime…", + "release_date": 1418256000 + } + ], + "offset": 0, + "limit": 2, + "estimatedTotalHits": 976, + "processingTimeMs": 35, + "query": "american " + } + )), + (status = 404, description = "Index not found", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Index `movies` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn similar_post( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -125,26 +251,35 @@ async fn similar( .await? } -#[derive(Debug, deserr::Deserr)] +#[derive(Debug, deserr::Deserr, IntoParams)] #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] +#[into_params(parameter_in = Query)] pub struct SimilarQueryGet { #[deserr(error = DeserrQueryParamError)] + #[param(value_type = String)] id: Param, #[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError)] + #[param(value_type = usize, default = DEFAULT_SEARCH_OFFSET)] offset: Param, #[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError)] + #[param(value_type = usize, default = DEFAULT_SEARCH_LIMIT)] limit: Param, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = Vec)] attributes_to_retrieve: Option>, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = bool, default)] retrieve_vectors: Param, #[deserr(default, error = DeserrQueryParamError)] filter: Option, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = bool, default)] show_ranking_score: Param, #[deserr(default, error = DeserrQueryParamError)] + #[param(value_type = bool, default)] show_ranking_score_details: Param, #[deserr(default, error = DeserrQueryParamError, default)] + #[param(value_type = Option)] pub ranking_score_threshold: Option, #[deserr(error = DeserrQueryParamError)] pub embedder: String, diff --git a/crates/meilisearch/src/routes/logs.rs b/crates/meilisearch/src/routes/logs.rs index 57e2cbd22..889ce824e 100644 --- a/crates/meilisearch/src/routes/logs.rs +++ b/crates/meilisearch/src/routes/logs.rs @@ -14,9 +14,11 @@ use index_scheduler::IndexScheduler; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::{Code, ResponseError}; +use serde::Serialize; use tokio::sync::mpsc; use tracing_subscriber::filter::Targets; use tracing_subscriber::Layer; +use utoipa::{OpenApi, ToSchema}; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; @@ -24,6 +26,18 @@ use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::{LogRouteHandle, LogStderrHandle}; +#[derive(OpenApi)] +#[openapi( + paths(get_logs, cancel_logs, update_stderr_target), + tags(( + name = "Logs", + description = "Everything about retrieving or customizing logs. +Currently [experimental](https://www.meilisearch.com/docs/learn/experimental/overview).", + external_docs(url = "https://www.meilisearch.com/docs/learn/experimental/log_customization"), + )), +)] +pub struct LogsApi; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("stream") @@ -33,12 +47,16 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::resource("stderr").route(web::post().to(SeqHandler(update_stderr_target)))); } -#[derive(Debug, Default, Clone, Copy, Deserr, PartialEq, Eq)] +#[derive(Debug, Default, Clone, Copy, Deserr, Serialize, PartialEq, Eq, ToSchema)] #[deserr(rename_all = camelCase)] +#[schema(rename_all = "camelCase")] pub enum LogMode { + /// Output the logs in a human readable form. #[default] Human, + /// Output the logs in json. Json, + /// Output the logs in the firefox profiler format. They can then be loaded and visualized at https://profiler.firefox.com/ Profile, } @@ -83,16 +101,26 @@ impl MergeWithError for DeserrJsonError { } } -#[derive(Debug, Deserr)] +#[derive(Debug, Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields, validate = validate_get_logs -> DeserrJsonError)] +#[schema(rename_all = "camelCase")] pub struct GetLogs { + /// Lets you specify which parts of the code you want to inspect and is formatted like that: code_part=log_level,code_part=log_level + /// - If the `code_part` is missing, then the `log_level` will be applied to everything. + /// - If the `log_level` is missing, then the `code_part` will be selected in `info` log level. #[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError)] + #[schema(value_type = String, default = "info", example = json!("milli=trace,index_scheduler,actix_web=off"))] target: MyTargets, + /// Lets you customize the format of the logs. #[deserr(default, error = DeserrJsonError)] + #[schema(default = LogMode::default)] mode: LogMode, + /// A boolean to indicate if you want to profile the memory as well. This is only useful while using the `profile` mode. + /// Be cautious, though; it slows down the engine a lot. #[deserr(default = false, error = DeserrJsonError)] + #[schema(default = false)] profile_memory: bool, } @@ -248,6 +276,46 @@ fn entry_stream( ) } +/// Retrieve logs +/// +/// Stream logs over HTTP. The format of the logs depends on the configuration specified in the payload. +/// The logs are sent as multi-part, and the stream never stops, so make sure your clients correctly handle that. +/// To make the server stop sending you logs, you can call the `DELETE /logs/stream` route. +/// +/// There can only be one listener at a timeand an error will be returned if you call this route while it's being used by another client. +#[utoipa::path( + post, + path = "/stream", + tag = "Logs", + security(("Bearer" = ["metrics.get", "metrics.*", "*"])), + request_body = GetLogs, + responses( + (status = OK, description = "Logs are being returned", body = String, content_type = "application/json", example = json!( + r#" +2024-10-08T13:35:02.643750Z WARN HTTP request{method=GET host="localhost:7700" route=/metrics query_parameters= user_agent=HTTPie/3.2.3 status_code=400 error=Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625}: tracing_actix_web::middleware: Error encountered while processing the incoming HTTP request: ResponseError { code: 400, message: "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625", error_code: "feature_not_enabled", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#feature_not_enabled" } +2024-10-08T13:35:02.644191Z INFO HTTP request{method=GET host="localhost:7700" route=/metrics query_parameters= user_agent=HTTPie/3.2.3 status_code=400 error=Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625}: meilisearch: close time.busy=1.66ms time.idle=658µs +2024-10-08T13:35:18.564152Z INFO HTTP request{method=PATCH host="localhost:7700" route=/experimental-features query_parameters= user_agent=curl/8.6.0 status_code=200}: meilisearch: close time.busy=1.17ms time.idle=127µs +2024-10-08T13:35:23.094987Z INFO HTTP request{method=GET host="localhost:7700" route=/metrics query_parameters= user_agent=HTTPie/3.2.3 status_code=200}: meilisearch: close time.busy=2.12ms time.idle=595µs +"# + )), + (status = 400, description = "The route is already being used", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The `/logs/stream` route is currently in use by someone else.", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn get_logs( index_scheduler: GuardedData, Data>, logs: Data, @@ -280,6 +348,26 @@ pub async fn get_logs( } } +/// Stop retrieving logs +/// +/// Call this route to make the engine stops sending logs through the `POST /logs/stream` route. +#[utoipa::path( + delete, + path = "/stream", + tag = "Logs", + security(("Bearer" = ["metrics.get", "metrics.*", "*"])), + responses( + (status = NO_CONTENT, description = "Logs are being returned"), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn cancel_logs( index_scheduler: GuardedData, Data>, logs: Data, @@ -293,13 +381,38 @@ pub async fn cancel_logs( Ok(HttpResponse::NoContent().finish()) } -#[derive(Debug, Deserr)] +#[derive(Debug, Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct UpdateStderrLogs { + /// Lets you specify which parts of the code you want to inspect and is formatted like that: code_part=log_level,code_part=log_level + /// - If the `code_part` is missing, then the `log_level` will be applied to everything. + /// - If the `log_level` is missing, then the `code_part` will be selected in `info` log level. #[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError)] + #[schema(value_type = String, default = "info", example = json!("milli=trace,index_scheduler,actix_web=off"))] target: MyTargets, } +/// Update target of the console logs +/// +/// This route lets you specify at runtime the level of the console logs outputted on stderr. +#[utoipa::path( + post, + path = "/stderr", + tag = "Logs", + request_body = UpdateStderrLogs, + security(("Bearer" = ["metrics.get", "metrics.*", "*"])), + responses( + (status = NO_CONTENT, description = "The console logs have been updated"), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn update_stderr_target( index_scheduler: GuardedData, Data>, logs: Data, diff --git a/crates/meilisearch/src/routes/metrics.rs b/crates/meilisearch/src/routes/metrics.rs index 7dd9ee3bb..6e93284c2 100644 --- a/crates/meilisearch/src/routes/metrics.rs +++ b/crates/meilisearch/src/routes/metrics.rs @@ -1,7 +1,3 @@ -use crate::extractors::authentication::policies::ActionPolicy; -use crate::extractors::authentication::{AuthenticationError, GuardedData}; -use crate::routes::create_all_stats; -use crate::search_queue::SearchQueue; use actix_web::http::header; use actix_web::web::{self, Data}; use actix_web::HttpResponse; @@ -12,11 +8,107 @@ use meilisearch_types::keys::actions; use meilisearch_types::tasks::Status; use prometheus::{Encoder, TextEncoder}; use time::OffsetDateTime; +use utoipa::OpenApi; + +use crate::extractors::authentication::policies::ActionPolicy; +use crate::extractors::authentication::{AuthenticationError, GuardedData}; +use crate::routes::create_all_stats; +use crate::search_queue::SearchQueue; + +#[derive(OpenApi)] +#[openapi(paths(get_metrics))] +pub struct MetricApi; pub fn configure(config: &mut web::ServiceConfig) { config.service(web::resource("").route(web::get().to(get_metrics))); } +/// Get prometheus metrics +/// +/// Retrieve metrics on the engine. See https://www.meilisearch.com/docs/learn/experimental/metrics +/// Currently, [the feature is experimental](https://www.meilisearch.com/docs/learn/experimental/overview) +/// which means it must be enabled. +#[utoipa::path( + get, + path = "", + tag = "Stats", + security(("Bearer" = ["metrics.get", "metrics.*", "*"])), + responses( + (status = 200, description = "The metrics of the instance", body = String, content_type = "text/plain", example = json!( + r#" +# HELP meilisearch_db_size_bytes Meilisearch DB Size In Bytes +# TYPE meilisearch_db_size_bytes gauge +meilisearch_db_size_bytes 1130496 +# HELP meilisearch_http_requests_total Meilisearch HTTP requests total +# TYPE meilisearch_http_requests_total counter +meilisearch_http_requests_total{method="GET",path="/metrics",status="400"} 1 +meilisearch_http_requests_total{method="PATCH",path="/experimental-features",status="200"} 1 +# HELP meilisearch_http_response_time_seconds Meilisearch HTTP response times +# TYPE meilisearch_http_response_time_seconds histogram +meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="0.005"} 0 +meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="0.01"} 0 +meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="0.025"} 0 +meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="0.05"} 0 +meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="0.075"} 0 +meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="0.1"} 0 +meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="0.25"} 0 +meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="0.5"} 0 +meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="0.75"} 0 +meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="1"} 0 +meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="2.5"} 0 +meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="5"} 0 +meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="7.5"} 0 +meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="10"} 0 +meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="+Inf"} 0 +meilisearch_http_response_time_seconds_sum{method="GET",path="/metrics"} 0 +meilisearch_http_response_time_seconds_count{method="GET",path="/metrics"} 0 +# HELP meilisearch_index_count Meilisearch Index Count +# TYPE meilisearch_index_count gauge +meilisearch_index_count 1 +# HELP meilisearch_index_docs_count Meilisearch Index Docs Count +# TYPE meilisearch_index_docs_count gauge +meilisearch_index_docs_count{index="mieli"} 2 +# HELP meilisearch_is_indexing Meilisearch Is Indexing +# TYPE meilisearch_is_indexing gauge +meilisearch_is_indexing 0 +# HELP meilisearch_last_update Meilisearch Last Update +# TYPE meilisearch_last_update gauge +meilisearch_last_update 1726675964 +# HELP meilisearch_nb_tasks Meilisearch Number of tasks +# TYPE meilisearch_nb_tasks gauge +meilisearch_nb_tasks{kind="indexes",value="mieli"} 39 +meilisearch_nb_tasks{kind="statuses",value="canceled"} 0 +meilisearch_nb_tasks{kind="statuses",value="enqueued"} 0 +meilisearch_nb_tasks{kind="statuses",value="failed"} 4 +meilisearch_nb_tasks{kind="statuses",value="processing"} 0 +meilisearch_nb_tasks{kind="statuses",value="succeeded"} 35 +meilisearch_nb_tasks{kind="types",value="documentAdditionOrUpdate"} 9 +meilisearch_nb_tasks{kind="types",value="documentDeletion"} 0 +meilisearch_nb_tasks{kind="types",value="documentEdition"} 0 +meilisearch_nb_tasks{kind="types",value="dumpCreation"} 0 +meilisearch_nb_tasks{kind="types",value="indexCreation"} 0 +meilisearch_nb_tasks{kind="types",value="indexDeletion"} 8 +meilisearch_nb_tasks{kind="types",value="indexSwap"} 0 +meilisearch_nb_tasks{kind="types",value="indexUpdate"} 0 +meilisearch_nb_tasks{kind="types",value="settingsUpdate"} 22 +meilisearch_nb_tasks{kind="types",value="snapshotCreation"} 0 +meilisearch_nb_tasks{kind="types",value="taskCancelation"} 0 +meilisearch_nb_tasks{kind="types",value="taskDeletion"} 0 +# HELP meilisearch_used_db_size_bytes Meilisearch Used DB Size In Bytes +# TYPE meilisearch_used_db_size_bytes gauge +meilisearch_used_db_size_bytes 409600 +"# + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn get_metrics( index_scheduler: GuardedData, Data>, auth_controller: Data, @@ -64,7 +156,7 @@ pub async fn get_metrics( let task_queue_latency_seconds = index_scheduler .get_tasks_from_authorized_indexes( - Query { + &Query { limit: Some(1), reverse: Some(true), statuses: Some(vec![Status::Enqueued, Status::Processing]), diff --git a/crates/meilisearch/src/routes/mod.rs b/crates/meilisearch/src/routes/mod.rs index 91237b707..3dcefdf46 100644 --- a/crates/meilisearch/src/routes/mod.rs +++ b/crates/meilisearch/src/routes/mod.rs @@ -4,19 +4,46 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; use meilisearch_auth::AuthController; -use meilisearch_types::error::{Code, ResponseError}; -use meilisearch_types::settings::{Settings, Unchecked}; +use meilisearch_types::batch_view::BatchView; +use meilisearch_types::batches::BatchStats; +use meilisearch_types::error::{Code, ErrorType, ResponseError}; +use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::keys::CreateApiKey; +use meilisearch_types::settings::{ + Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings, + Unchecked, +}; +use meilisearch_types::task_view::{DetailsView, TaskView}; use meilisearch_types::tasks::{Kind, Status, Task, TaskId}; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; use tracing::debug; +use utoipa::{OpenApi, ToSchema}; +use self::api_key::KeyView; +use self::indexes::documents::BrowseQuery; +use self::indexes::{IndexCreateRequest, IndexStats, UpdateIndexRequest}; +use self::logs::{GetLogs, LogMode, UpdateStderrLogs}; +use self::open_api_utils::OpenApiAuth; +use self::tasks::AllTasks; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; +use crate::milli::progress::{ProgressStepView, ProgressView}; +use crate::routes::batches::AllBatches; +use crate::routes::features::RuntimeTogglableFeatures; +use crate::routes::indexes::documents::{DocumentDeletionByFilter, DocumentEditionByFunction}; +use crate::routes::indexes::IndexView; +use crate::routes::multi_search::SearchResults; +use crate::routes::swap_indexes::SwapIndexesPayload; +use crate::search::{ + FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, + SearchQueryWithIndex, SearchResultWithIndex, SimilarQuery, SimilarResult, +}; use crate::search_queue::SearchQueue; use crate::Opt; const PAGINATION_DEFAULT_LIMIT: usize = 20; +const PAGINATION_DEFAULT_LIMIT_FN: fn() -> usize = || 20; mod api_key; pub mod batches; @@ -27,10 +54,41 @@ mod logs; mod metrics; mod multi_search; mod multi_search_analytics; +mod open_api_utils; mod snapshot; mod swap_indexes; pub mod tasks; +#[derive(OpenApi)] +#[openapi( + nest( + (path = "/tasks", api = tasks::TaskApi), + (path = "/batches", api = batches::BatchesApi), + (path = "/indexes", api = indexes::IndexesApi), + // We must stop the search path here because the rest must be configured by each route individually + (path = "/indexes", api = indexes::search::SearchApi), + (path = "/snapshots", api = snapshot::SnapshotApi), + (path = "/dumps", api = dump::DumpApi), + (path = "/keys", api = api_key::ApiKeyApi), + (path = "/metrics", api = metrics::MetricApi), + (path = "/logs", api = logs::LogsApi), + (path = "/multi-search", api = multi_search::MultiSearchApi), + (path = "/swap-indexes", api = swap_indexes::SwapIndexesApi), + (path = "/experimental-features", api = features::ExperimentalFeaturesApi), + ), + paths(get_health, get_version, get_stats), + tags( + (name = "Stats", description = "Stats gives extended information and metrics about indexes and the Meilisearch database."), + ), + modifiers(&OpenApiAuth), + servers(( + url = "/", + description = "Local server", + )), + components(schemas(PaginationView, PaginationView, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings, Settings, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind)) +)] +pub struct MeilisearchApi; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::scope("/tasks").configure(tasks::configure)) .service(web::scope("/batches").configure(batches::configure)) @@ -46,6 +104,13 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::scope("/swap-indexes").configure(swap_indexes::configure)) .service(web::scope("/metrics").configure(metrics::configure)) .service(web::scope("/experimental-features").configure(features::configure)); + + #[cfg(feature = "swagger")] + { + use utoipa_scalar::{Scalar, Servable as ScalarServable}; + let openapi = MeilisearchApi::openapi(); + cfg.service(Scalar::with_url("/scalar", openapi.clone())); + } } pub fn get_task_id(req: &HttpRequest, opt: &Opt) -> Result, ResponseError> { @@ -98,14 +163,20 @@ pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result { .map_or(false, |s| s.to_lowercase() == "true")) } -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct SummarizedTaskView { + /// The task unique identifier. + #[schema(value_type = u32)] task_uid: TaskId, + /// The index affected by this task. May be `null` if the task is not linked to any index. index_uid: Option, + /// The status of the task. status: Status, + /// The type of the task. #[serde(rename = "type")] kind: Kind, + /// The date on which the task was enqueued. #[serde(serialize_with = "time::serde::rfc3339::serialize")] enqueued_at: OffsetDateTime, } @@ -127,7 +198,9 @@ pub struct Pagination { pub limit: usize, } -#[derive(Debug, Clone, Serialize)] +#[derive(Debug, Clone, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] pub struct PaginationView { pub results: Vec, pub offset: usize, @@ -283,17 +356,56 @@ pub async fn running() -> HttpResponse { HttpResponse::Ok().json(serde_json::json!({ "status": "Meilisearch is running" })) } -#[derive(Serialize, Debug)] +#[derive(Serialize, Debug, ToSchema)] #[serde(rename_all = "camelCase")] pub struct Stats { + /// The size of the database, in bytes. pub database_size: u64, #[serde(skip)] pub used_database_size: u64, + /// The date of the last update in the RFC 3339 formats. Can be `null` if no update has ever been processed. #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] pub last_update: Option, + /// The stats of every individual index your API key lets you access. + #[schema(value_type = HashMap)] pub indexes: BTreeMap, } +/// Get stats of all indexes. +/// +/// Get stats of all indexes. +#[utoipa::path( + get, + path = "/stats", + tag = "Stats", + security(("Bearer" = ["stats.get", "stats.*", "*"])), + responses( + (status = 200, description = "The stats of the instance", body = Stats, content_type = "application/json", example = json!( + { + "databaseSize": 567, + "lastUpdate": "2019-11-20T09:40:33.711324Z", + "indexes": { + "movies": { + "numberOfDocuments": 10, + "isIndexing": true, + "fieldDistribution": { + "genre": 10, + "author": 9 + } + } + } + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] async fn get_stats( index_scheduler: GuardedData, Data>, auth_controller: GuardedData, Data>, @@ -343,14 +455,43 @@ pub fn create_all_stats( Ok(stats) } -#[derive(Serialize)] +#[derive(Serialize, ToSchema)] #[serde(rename_all = "camelCase")] struct VersionResponse { + /// The commit used to compile this build of Meilisearch. commit_sha: String, + /// The date of this build. commit_date: String, + /// The version of Meilisearch. pkg_version: String, } +/// Get version +/// +/// Current version of Meilisearch. +#[utoipa::path( + get, + path = "/version", + tag = "Version", + security(("Bearer" = ["version", "*"])), + responses( + (status = 200, description = "Instance is healthy", body = VersionResponse, content_type = "application/json", example = json!( + { + "commitSha": "b46889b5f0f2f8b91438a08a358ba8f05fc09fc1", + "commitDate": "2021-07-08", + "pkgVersion": "0.23.0" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] async fn get_version( _index_scheduler: GuardedData, Data>, ) -> HttpResponse { @@ -370,6 +511,35 @@ async fn get_version( }) } +#[derive(Default, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +struct HealthResponse { + /// The status of the instance. + status: HealthStatus, +} + +#[derive(Default, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +enum HealthStatus { + #[default] + Available, +} + +/// Get Health +/// +/// The health check endpoint enables you to periodically test the health of your Meilisearch instance. +#[utoipa::path( + get, + path = "/health", + tag = "Health", + responses( + (status = 200, description = "Instance is healthy", body = HealthResponse, content_type = "application/json", example = json!( + { + "status": "available" + } + )), + ) +)] pub async fn get_health( index_scheduler: Data, auth_controller: Data, @@ -379,5 +549,5 @@ pub async fn get_health( index_scheduler.health().unwrap(); auth_controller.health().unwrap(); - Ok(HttpResponse::Ok().json(serde_json::json!({ "status": "available" }))) + Ok(HttpResponse::Ok().json(HealthResponse::default())) } diff --git a/crates/meilisearch/src/routes/multi_search.rs b/crates/meilisearch/src/routes/multi_search.rs index a2db0b22b..495b3c99c 100644 --- a/crates/meilisearch/src/routes/multi_search.rs +++ b/crates/meilisearch/src/routes/multi_search.rs @@ -8,6 +8,7 @@ use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; use serde::Serialize; use tracing::debug; +use utoipa::{OpenApi, ToSchema}; use super::multi_search_analytics::MultiSearchAggregator; use crate::analytics::Analytics; @@ -17,20 +18,127 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::indexes::search::search_kind; use crate::search::{ - add_search_rules, perform_federated_search, perform_search, FederatedSearch, RetrieveVectors, - SearchQueryWithIndex, SearchResultWithIndex, + add_search_rules, perform_federated_search, perform_search, FederatedSearch, + FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex, }; use crate::search_queue::SearchQueue; +#[derive(OpenApi)] +#[openapi( + paths(multi_search_with_post), + tags(( + name = "Multi-search", + description = "The `/multi-search` route allows you to perform multiple search queries on one or more indexes by bundling them into a single HTTP request. Multi-search is also known as federated search.", + external_docs(url = "https://www.meilisearch.com/docs/reference/api/multi_search"), + )), +)] +pub struct MultiSearchApi; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post)))); } -#[derive(Serialize)] -struct SearchResults { +#[derive(Serialize, ToSchema)] +pub struct SearchResults { results: Vec, } +/// Perform a multi-search +/// +/// Bundle multiple search queries in a single API request. Use this endpoint to search through multiple indexes at once. +#[utoipa::path( + post, + path = "", + tag = "Multi-search", + security(("Bearer" = ["search", "*"])), + responses( + (status = OK, description = "Non federated multi-search", body = SearchResults, content_type = "application/json", example = json!( + { + "results":[ + { + "indexUid":"movies", + "hits":[ + { + "id":13682, + "title":"Pooh's Heffalump Movie", + }, + ], + "query":"pooh", + "processingTimeMs":26, + "limit":1, + "offset":0, + "estimatedTotalHits":22 + }, + { + "indexUid":"movies", + "hits":[ + { + "id":12, + "title":"Finding Nemo", + }, + ], + "query":"nemo", + "processingTimeMs":5, + "limit":1, + "offset":0, + "estimatedTotalHits":11 + }, + { + "indexUid":"movie_ratings", + "hits":[ + { + "id":"Us", + "director": "Jordan Peele", + } + ], + "query":"Us", + "processingTimeMs":0, + "limit":1, + "offset":0, + "estimatedTotalHits":1 + } + ] + } + )), + (status = OK, description = "Federated multi-search", body = FederatedSearchResult, content_type = "application/json", example = json!( + { + "hits": [ + { + "id": 42, + "title": "Batman returns", + "overview": "The overview of batman returns", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0 + } + }, + { + "comicsId": "batman-killing-joke", + "description": "This comic is really awesome", + "title": "Batman: the killing joke", + "_federation": { + "indexUid": "comics", + "queriesPosition": 1 + } + }, + ], + "processingTimeMs": 0, + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2, + "semanticHitCount": 0 + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn multi_search_with_post( index_scheduler: GuardedData, Data>, search_queue: Data, diff --git a/crates/meilisearch/src/routes/open_api_utils.rs b/crates/meilisearch/src/routes/open_api_utils.rs new file mode 100644 index 000000000..89a3ef76a --- /dev/null +++ b/crates/meilisearch/src/routes/open_api_utils.rs @@ -0,0 +1,24 @@ +use serde::Serialize; +use utoipa::openapi::security::{HttpAuthScheme, HttpBuilder, SecurityScheme}; + +#[derive(Debug, Serialize)] +pub struct OpenApiAuth; + +impl utoipa::Modify for OpenApiAuth { + fn modify(&self, openapi: &mut utoipa::openapi::OpenApi) { + if let Some(schema) = openapi.components.as_mut() { + schema.add_security_scheme( + "Bearer", + SecurityScheme::Http( + HttpBuilder::new() + .scheme(HttpAuthScheme::Bearer) + .bearer_format("Uuidv4, string or JWT") + .description(Some( +"An API key is a token that you provide when making API calls. Include the token in a header parameter called `Authorization`. +Example: `Authorization: Bearer 8fece4405662dd830e4cb265e7e047aab2e79672a760a12712d2a263c9003509`")) + .build(), + ), + ); + } + } +} diff --git a/crates/meilisearch/src/routes/snapshot.rs b/crates/meilisearch/src/routes/snapshot.rs index cacbc41af..de7ecc37f 100644 --- a/crates/meilisearch/src/routes/snapshot.rs +++ b/crates/meilisearch/src/routes/snapshot.rs @@ -4,6 +4,7 @@ use index_scheduler::IndexScheduler; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; use tracing::debug; +use utoipa::OpenApi; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; @@ -12,12 +13,55 @@ use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::Opt; +#[derive(OpenApi)] +#[openapi( + paths(create_snapshot), + tags(( + name = "Snapshots", + description = "The snapshots route allows the creation of database snapshots. Snapshots are .snapshot files that can be used to launch Meilisearch. +Creating a snapshot is also referred to as exporting it, whereas launching Meilisearch with a snapshot is referred to as importing it. +During a snapshot export, all indexes of the current instance are exported—together with their documents and settings—and saved as a single .snapshot file. +During a snapshot import, all indexes contained in the indicated .snapshot file are imported along with their associated documents and settings. +Snapshot imports are performed at launch using an option.", + external_docs(url = "https://www.meilisearch.com/docs/reference/api/snapshots"), + )), +)] +pub struct SnapshotApi; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot)))); } crate::empty_analytics!(SnapshotAnalytics, "Snapshot Created"); +/// Create a snapshot +/// +/// Triggers a snapshot creation process. Once the process is complete, a snapshot is created in the snapshot directory. If the snapshot directory does not exist yet, it will be created. +#[utoipa::path( + post, + path = "", + tag = "Snapshots", + security(("Bearer" = ["snapshots.create", "snapshots.*", "*"])), + responses( + (status = 202, description = "Snapshot is being created", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 0, + "indexUid": null, + "status": "enqueued", + "type": "snapshotCreation", + "enqueuedAt": "2021-01-01T09:39:00.000000Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn create_snapshot( index_scheduler: GuardedData, Data>, req: HttpRequest, diff --git a/crates/meilisearch/src/routes/swap_indexes.rs b/crates/meilisearch/src/routes/swap_indexes.rs index 9b8b67e63..4a35d1a6d 100644 --- a/crates/meilisearch/src/routes/swap_indexes.rs +++ b/crates/meilisearch/src/routes/swap_indexes.rs @@ -9,6 +9,7 @@ use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::tasks::{IndexSwap, KindWithContent}; use serde::Serialize; +use utoipa::{OpenApi, ToSchema}; use super::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::analytics::{Aggregate, Analytics}; @@ -18,13 +19,18 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; use crate::Opt; +#[derive(OpenApi)] +#[openapi(paths(swap_indexes))] +pub struct SwapIndexesApi; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes)))); } -#[derive(Deserr, Debug, Clone, PartialEq, Eq)] +#[derive(Deserr, Debug, Clone, PartialEq, Eq, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct SwapIndexesPayload { + /// Array of the two indexUids to be swapped #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_swap_indexes)] indexes: Vec, } @@ -50,6 +56,37 @@ impl Aggregate for IndexSwappedAnalytics { } } +/// Swap indexes +/// +/// Swap the documents, settings, and task history of two or more indexes. You can only swap indexes in pairs. However, a single request can swap as many index pairs as you wish. +/// Swapping indexes is an atomic transaction: either all indexes are successfully swapped, or none are. +/// Swapping indexA and indexB will also replace every mention of indexA by indexB and vice-versa in the task history. enqueued tasks are left unmodified. +#[utoipa::path( + post, + path = "", + tag = "Indexes", + security(("Bearer" = ["search", "*"])), + request_body = Vec, + responses( + (status = OK, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 3, + "indexUid": null, + "status": "enqueued", + "type": "indexSwap", + "enqueuedAt": "2021-08-12T10:00:00.000000Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] pub async fn swap_indexes( index_scheduler: GuardedData, Data>, params: AwebJson, DeserrJsonError>, diff --git a/crates/meilisearch/src/routes/tasks.rs b/crates/meilisearch/src/routes/tasks.rs index cd82a6a18..fce2bc8bf 100644 --- a/crates/meilisearch/src/routes/tasks.rs +++ b/crates/meilisearch/src/routes/tasks.rs @@ -17,6 +17,7 @@ use time::format_description::well_known::Rfc3339; use time::macros::format_description; use time::{Date, Duration, OffsetDateTime, Time}; use tokio::task; +use utoipa::{IntoParams, OpenApi, ToSchema}; use super::{get_task_id, is_dry_run, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use crate::analytics::{Aggregate, AggregateMethod, Analytics}; @@ -25,6 +26,17 @@ use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::{aggregate_methods, Opt}; +#[derive(OpenApi)] +#[openapi( + paths(get_tasks, delete_tasks, cancel_tasks, get_task), + tags(( + name = "Tasks", + description = "The tasks route gives information about the progress of the [asynchronous operations](https://docs.meilisearch.com/learn/advanced/asynchronous_operations.html).", + external_docs(url = "https://www.meilisearch.com/docs/reference/api/tasks"), + )), +)] +pub struct TaskApi; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") @@ -35,41 +47,72 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task)))); } -#[derive(Debug, Deserr)] +#[derive(Debug, Deserr, IntoParams)] #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] +#[into_params(rename_all = "camelCase", parameter_in = Query)] pub struct TasksFilterQuery { + /// Maximum number of results to return. #[deserr(default = Param(PAGINATION_DEFAULT_LIMIT as u32), error = DeserrQueryParamError)] + #[param(required = false, value_type = u32, example = 12, default = json!(PAGINATION_DEFAULT_LIMIT))] pub limit: Param, + /// Fetch the next set of results from the given uid. #[deserr(default, error = DeserrQueryParamError)] + #[param(required = false, value_type = Option, example = 12421)] pub from: Option>, + /// The order you want to retrieve the objects. #[deserr(default, error = DeserrQueryParamError)] + #[param(required = false, value_type = Option, example = true)] pub reverse: Option>, + /// Permits to filter tasks by their batch uid. By default, when the `batchUids` query parameter is not set, all task uids are returned. It's possible to specify several batch uids by separating them with the `,` character. #[deserr(default, error = DeserrQueryParamError)] + #[param(required = false, value_type = Option, example = 12421)] pub batch_uids: OptionStarOrList, + /// Permits to filter tasks by their uid. By default, when the uids query parameter is not set, all task uids are returned. It's possible to specify several uids by separating them with the `,` character. #[deserr(default, error = DeserrQueryParamError)] + #[param(required = false, value_type = Option>, example = json!([231, 423, 598, "*"]))] pub uids: OptionStarOrList, + /// Permits to filter tasks using the uid of the task that canceled them. It's possible to specify several task uids by separating them with the `,` character. #[deserr(default, error = DeserrQueryParamError)] + #[param(required = false, value_type = Option>, example = json!([374, "*"]))] pub canceled_by: OptionStarOrList, + /// Permits to filter tasks by their related type. By default, when `types` query parameter is not set, all task types are returned. It's possible to specify several types by separating them with the `,` character. #[deserr(default, error = DeserrQueryParamError)] + #[param(required = false, value_type = Option>, example = json!([Kind::DocumentAdditionOrUpdate, "*"]))] pub types: OptionStarOrList, + /// Permits to filter tasks by their status. By default, when `statuses` query parameter is not set, all task statuses are returned. It's possible to specify several statuses by separating them with the `,` character. #[deserr(default, error = DeserrQueryParamError)] + #[param(required = false, value_type = Option>, example = json!([Status::Succeeded, Status::Failed, Status::Canceled, Status::Enqueued, Status::Processing, "*"]))] pub statuses: OptionStarOrList, + /// Permits to filter tasks by their related index. By default, when `indexUids` query parameter is not set, the tasks of all the indexes are returned. It is possible to specify several indexes by separating them with the `,` character. #[deserr(default, error = DeserrQueryParamError)] + #[param(required = false, value_type = Option>, example = json!(["movies", "theater", "*"]))] pub index_uids: OptionStarOrList, + /// Permits to filter tasks based on their enqueuedAt time. Matches tasks enqueued after the given date. Supports RFC 3339 date format. #[deserr(default, error = DeserrQueryParamError, try_from(OptionStarOr) = deserialize_date_after -> InvalidTaskDateError)] + #[param(required = false, value_type = Option, example = json!(["2024-08-08T16:37:09.971Z", "*"]))] pub after_enqueued_at: OptionStarOr, + /// Permits to filter tasks based on their enqueuedAt time. Matches tasks enqueued before the given date. Supports RFC 3339 date format. #[deserr(default, error = DeserrQueryParamError, try_from(OptionStarOr) = deserialize_date_before -> InvalidTaskDateError)] + #[param(required = false, value_type = Option, example = json!(["2024-08-08T16:37:09.971Z", "*"]))] pub before_enqueued_at: OptionStarOr, + /// Permits to filter tasks based on their startedAt time. Matches tasks started after the given date. Supports RFC 3339 date format. #[deserr(default, error = DeserrQueryParamError, try_from(OptionStarOr) = deserialize_date_after -> InvalidTaskDateError)] + #[param(required = false, value_type = Option, example = json!(["2024-08-08T16:37:09.971Z", "*"]))] pub after_started_at: OptionStarOr, + /// Permits to filter tasks based on their startedAt time. Matches tasks started before the given date. Supports RFC 3339 date format. #[deserr(default, error = DeserrQueryParamError, try_from(OptionStarOr) = deserialize_date_before -> InvalidTaskDateError)] + #[param(required = false, value_type = Option, example = json!(["2024-08-08T16:37:09.971Z", "*"]))] pub before_started_at: OptionStarOr, + /// Permits to filter tasks based on their finishedAt time. Matches tasks finished after the given date. Supports RFC 3339 date format. #[deserr(default, error = DeserrQueryParamError, try_from(OptionStarOr) = deserialize_date_after -> InvalidTaskDateError)] + #[param(required = false, value_type = Option, example = json!(["2024-08-08T16:37:09.971Z", "*"]))] pub after_finished_at: OptionStarOr, + /// Permits to filter tasks based on their finishedAt time. Matches tasks finished before the given date. Supports RFC 3339 date format. #[deserr(default, error = DeserrQueryParamError, try_from(OptionStarOr) = deserialize_date_before -> InvalidTaskDateError)] + #[param(required = false, value_type = Option, example = json!(["2024-08-08T16:37:09.971Z", "*"]))] pub before_finished_at: OptionStarOr, } @@ -117,33 +160,58 @@ impl TaskDeletionOrCancelationQuery { } } -#[derive(Debug, Deserr)] +#[derive(Debug, Deserr, IntoParams)] #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] +#[into_params(rename_all = "camelCase", parameter_in = Query)] pub struct TaskDeletionOrCancelationQuery { + /// Permits to filter tasks by their uid. By default, when the `uids` query parameter is not set, all task uids are returned. It's possible to specify several uids by separating them with the `,` character. #[deserr(default, error = DeserrQueryParamError)] - pub uids: OptionStarOrList, + #[param(required = false, value_type = Option>, example = json!([231, 423, 598, "*"]))] + pub uids: OptionStarOrList, + /// Lets you filter tasks by their `batchUid`. #[deserr(default, error = DeserrQueryParamError)] + #[param(required = false, value_type = Option>, example = json!([231, 423, 598, "*"]))] pub batch_uids: OptionStarOrList, + /// Permits to filter tasks using the uid of the task that canceled them. It's possible to specify several task uids by separating them with the `,` character. #[deserr(default, error = DeserrQueryParamError)] - pub canceled_by: OptionStarOrList, + #[param(required = false, value_type = Option>, example = json!([374, "*"]))] + pub canceled_by: OptionStarOrList, + /// Permits to filter tasks by their related type. By default, when `types` query parameter is not set, all task types are returned. It's possible to specify several types by separating them with the `,` character. #[deserr(default, error = DeserrQueryParamError)] + #[param(required = false, value_type = Option>, example = json!([Kind::DocumentDeletion, "*"]))] pub types: OptionStarOrList, + /// Permits to filter tasks by their status. By default, when `statuses` query parameter is not set, all task statuses are returned. It's possible to specify several statuses by separating them with the `,` character. #[deserr(default, error = DeserrQueryParamError)] + #[param(required = false, value_type = Option>, example = json!([Status::Succeeded, Status::Failed, Status::Canceled, "*"]))] pub statuses: OptionStarOrList, + /// Permits to filter tasks by their related index. By default, when `indexUids` query parameter is not set, the tasks of all the indexes are returned. It is possible to specify several indexes by separating them with the `,` character. #[deserr(default, error = DeserrQueryParamError)] + #[param(required = false, value_type = Option>, example = json!(["movies", "theater", "*"]))] pub index_uids: OptionStarOrList, + /// Permits to filter tasks based on their enqueuedAt time. Matches tasks enqueued after the given date. Supports RFC 3339 date format. #[deserr(default, error = DeserrQueryParamError, try_from(OptionStarOr) = deserialize_date_after -> InvalidTaskDateError)] + #[param(required = false, value_type = Option, example = json!(["2024-08-08T16:37:09.971Z", "*"]))] pub after_enqueued_at: OptionStarOr, + /// Permits to filter tasks based on their enqueuedAt time. Matches tasks enqueued before the given date. Supports RFC 3339 date format. #[deserr(default, error = DeserrQueryParamError, try_from(OptionStarOr) = deserialize_date_before -> InvalidTaskDateError)] + #[param(required = false, value_type = Option, example = json!(["2024-08-08T16:37:09.971Z", "*"]))] pub before_enqueued_at: OptionStarOr, + /// Permits to filter tasks based on their startedAt time. Matches tasks started after the given date. Supports RFC 3339 date format. #[deserr(default, error = DeserrQueryParamError, try_from(OptionStarOr) = deserialize_date_after -> InvalidTaskDateError)] + #[param(required = false, value_type = Option, example = json!(["2024-08-08T16:37:09.971Z", "*"]))] pub after_started_at: OptionStarOr, + /// Permits to filter tasks based on their startedAt time. Matches tasks started before the given date. Supports RFC 3339 date format. #[deserr(default, error = DeserrQueryParamError, try_from(OptionStarOr) = deserialize_date_before -> InvalidTaskDateError)] + #[param(required = false, value_type = Option, example = json!(["2024-08-08T16:37:09.971Z", "*"]))] pub before_started_at: OptionStarOr, + /// Permits to filter tasks based on their finishedAt time. Matches tasks finished after the given date. Supports RFC 3339 date format. #[deserr(default, error = DeserrQueryParamError, try_from(OptionStarOr) = deserialize_date_after -> InvalidTaskDateError)] + #[param(required = false, value_type = Option, example = json!(["2024-08-08T16:37:09.971Z", "*"]))] pub after_finished_at: OptionStarOr, + /// Permits to filter tasks based on their finishedAt time. Matches tasks finished before the given date. Supports RFC 3339 date format. #[deserr(default, error = DeserrQueryParamError, try_from(OptionStarOr) = deserialize_date_before -> InvalidTaskDateError)] + #[param(required = false, value_type = Option, example = json!(["2024-08-08T16:37:09.971Z", "*"]))] pub before_finished_at: OptionStarOr, } @@ -226,6 +294,51 @@ impl Aggregate for TaskFilterAnalytics, Data>, params: AwebQueryParameter, @@ -260,11 +373,8 @@ async fn cancel_tasks( let query = params.into_query(); - let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes( - &index_scheduler.read_txn()?, - &query, - index_scheduler.filters(), - )?; + let (tasks, _) = + index_scheduler.get_task_ids_from_authorized_indexes(&query, index_scheduler.filters())?; let task_cancelation = KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks }; @@ -278,6 +388,51 @@ async fn cancel_tasks( Ok(HttpResponse::Ok().json(task)) } +/// Delete tasks +/// +/// Delete [tasks](https://docs.meilisearch.com/learn/advanced/asynchronous_operations.html) on filter +#[utoipa::path( + delete, + path = "", + tag = "Tasks", + security(("Bearer" = ["tasks.delete", "tasks.*", "*"])), + params(TaskDeletionOrCancelationQuery), + responses( + (status = 200, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!( + { + "taskUid": 147, + "indexUid": null, + "status": "enqueued", + "type": "taskDeletion", + "enqueuedAt": "2024-08-08T17:05:55.791772Z" + } + )), + (status = 400, description = "A filter is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.", + "code": "missing_task_filters", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_task_filters" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + (status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Task :taskUid not found.", + "code": "task_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors/#task_not_found" + } + )) + ) +)] async fn delete_tasks( index_scheduler: GuardedData, Data>, params: AwebQueryParameter, @@ -312,11 +467,8 @@ async fn delete_tasks( let query = params.into_query(); - let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes( - &index_scheduler.read_txn()?, - &query, - index_scheduler.filters(), - )?; + let (tasks, _) = + index_scheduler.get_task_ids_from_authorized_indexes(&query, index_scheduler.filters())?; let task_deletion = KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks }; @@ -329,15 +481,69 @@ async fn delete_tasks( Ok(HttpResponse::Ok().json(task)) } -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, ToSchema)] pub struct AllTasks { + /// The list of tasks that matched the filter. results: Vec, + /// Total number of browsable results using offset/limit parameters for the given resource. total: u64, + /// Limit given for the query. If limit is not provided as a query parameter, this parameter displays the default limit value. limit: u32, + /// The first task uid returned. from: Option, + /// Represents the value to send in from to fetch the next slice of the results. The first item for the next slice starts at this exact number. When the returned value is null, it means that all the data have been browsed in the given order. next: Option, } +/// Get all tasks +/// +/// Get all [tasks](https://docs.meilisearch.com/learn/advanced/asynchronous_operations.html) +#[utoipa::path( + get, + path = "", + tag = "Tasks", + security(("Bearer" = ["tasks.get", "tasks.*", "*"])), + params(TasksFilterQuery), + responses( + (status = 200, description = "Get all tasks", body = AllTasks, content_type = "application/json", example = json!( + { + "results": [ + { + "uid": 144, + "indexUid": "mieli", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "settings": { + "filterableAttributes": [ + "play_count" + ] + } + }, + "error": null, + "duration": "PT0.009330S", + "enqueuedAt": "2024-08-08T09:01:13.348471Z", + "startedAt": "2024-08-08T09:01:13.349442Z", + "finishedAt": "2024-08-08T09:01:13.358772Z" + } + ], + "total": 1, + "limit": 1, + "from": 144, + "next": null + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] async fn get_tasks( index_scheduler: GuardedData, Data>, params: AwebQueryParameter, @@ -349,7 +555,7 @@ async fn get_tasks( let query = params.into_query(); let filters = index_scheduler.filters(); - let (tasks, total) = index_scheduler.get_tasks_from_authorized_indexes(query, filters)?; + let (tasks, total) = index_scheduler.get_tasks_from_authorized_indexes(&query, filters)?; let mut results: Vec<_> = tasks.iter().map(TaskView::from_task).collect(); // If we were able to fetch the number +1 tasks we asked @@ -362,6 +568,52 @@ async fn get_tasks( Ok(HttpResponse::Ok().json(tasks)) } +/// Get a task +/// +/// Get a [task](https://www.meilisearch.com/docs/learn/async/asynchronous_operations) +#[utoipa::path( + get, + path = "/{taskUid}", + tag = "Tasks", + security(("Bearer" = ["tasks.get", "tasks.*", "*"])), + params(("taskUid", format = UInt32, example = 0, description = "The task identifier", nullable = false)), + responses( + (status = 200, description = "Task successfully retrieved", body = TaskView, content_type = "application/json", example = json!( + { + "uid": 1, + "indexUid": "movies", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 79000, + "indexedDocuments": 79000 + }, + "error": null, + "duration": "PT1S", + "enqueuedAt": "2021-01-01T09:39:00.000000Z", + "startedAt": "2021-01-01T09:39:01.000000Z", + "finishedAt": "2021-01-01T09:39:02.000000Z" + } + )), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + (status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "Task :taskUid not found.", + "code": "task_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors/#task_not_found" + } + )) + ) +)] async fn get_task( index_scheduler: GuardedData, Data>, task_uid: web::Path, @@ -377,7 +629,7 @@ async fn get_task( let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() }; let filters = index_scheduler.filters(); - let (tasks, _) = index_scheduler.get_tasks_from_authorized_indexes(query, filters)?; + let (tasks, _) = index_scheduler.get_tasks_from_authorized_indexes(&query, filters)?; if let Some(task) = tasks.first() { let task_view = TaskView::from_task(task); diff --git a/crates/meilisearch/src/search/federated.rs b/crates/meilisearch/src/search/federated.rs index c1c6bb7d7..dec3927e3 100644 --- a/crates/meilisearch/src/search/federated.rs +++ b/crates/meilisearch/src/search/federated.rs @@ -22,6 +22,7 @@ use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue}; use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget}; use roaring::RoaringBitmap; use serde::Serialize; +use utoipa::ToSchema; use super::ranking_rules::{self, RankingRules}; use super::{ @@ -33,10 +34,11 @@ use crate::routes::indexes::search::search_kind; pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0; -#[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr)] +#[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct FederationOptions { #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = f64)] pub weight: Weight, } @@ -70,8 +72,9 @@ impl std::ops::Deref for Weight { } } -#[derive(Debug, deserr::Deserr)] +#[derive(Debug, deserr::Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[schema(rename_all = "camelCase")] pub struct Federation { #[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError)] pub limit: usize, @@ -83,22 +86,26 @@ pub struct Federation { pub merge_facets: Option, } -#[derive(Copy, Clone, Debug, deserr::Deserr, Default)] +#[derive(Copy, Clone, Debug, deserr::Deserr, Default, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[schema(rename_all = "camelCase")] pub struct MergeFacets { #[deserr(default, error = DeserrJsonError)] pub max_values_per_facet: Option, } -#[derive(Debug, deserr::Deserr)] +#[derive(Debug, deserr::Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[schema(rename_all = "camelCase")] pub struct FederatedSearch { pub queries: Vec, #[deserr(default)] pub federation: Option, } -#[derive(Serialize, Clone)] + +#[derive(Serialize, Clone, ToSchema)] #[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] pub struct FederatedSearchResult { pub hits: Vec, pub processing_time_ms: u128, @@ -109,6 +116,7 @@ pub struct FederatedSearchResult { pub semantic_hit_count: Option, #[serde(skip_serializing_if = "Option::is_none")] + #[schema(value_type = Option>>)] pub facet_distribution: Option>>, #[serde(skip_serializing_if = "Option::is_none")] pub facet_stats: Option>, @@ -355,7 +363,7 @@ struct SearchResultByIndex { facets: Option, } -#[derive(Debug, Clone, Default, Serialize)] +#[derive(Debug, Clone, Default, Serialize, ToSchema)] pub struct FederatedFacets(pub BTreeMap); impl FederatedFacets { diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index b48266b6a..abeae55bd 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -34,11 +34,15 @@ use serde::Serialize; use serde_json::{json, Value}; #[cfg(test)] mod mod_test; +use utoipa::ToSchema; use crate::error::MeilisearchHttpError; mod federated; -pub use federated::{perform_federated_search, FederatedSearch, Federation, FederationOptions}; +pub use federated::{ + perform_federated_search, FederatedSearch, FederatedSearchResult, Federation, + FederationOptions, MergeFacets, +}; mod ranking_rules; @@ -52,7 +56,7 @@ pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "".to_string(); pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "".to_string(); pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5); -#[derive(Clone, Default, PartialEq, Deserr)] +#[derive(Clone, Default, PartialEq, Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct SearchQuery { #[deserr(default, error = DeserrJsonError)] @@ -62,8 +66,10 @@ pub struct SearchQuery { #[deserr(default, error = DeserrJsonError)] pub hybrid: Option, #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] + #[schema(default = DEFAULT_SEARCH_OFFSET)] pub offset: usize, #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError)] + #[schema(default = DEFAULT_SEARCH_LIMIT)] pub limit: usize, #[deserr(default, error = DeserrJsonError)] pub page: Option, @@ -75,15 +81,16 @@ pub struct SearchQuery { pub retrieve_vectors: bool, #[deserr(default, error = DeserrJsonError)] pub attributes_to_crop: Option>, - #[deserr(default, error = DeserrJsonError, default = DEFAULT_CROP_LENGTH())] + #[deserr(error = DeserrJsonError, default = DEFAULT_CROP_LENGTH())] + #[schema(default = DEFAULT_CROP_LENGTH)] pub crop_length: usize, #[deserr(default, error = DeserrJsonError)] pub attributes_to_highlight: Option>, - #[deserr(default, error = DeserrJsonError, default)] + #[deserr(default, error = DeserrJsonError)] pub show_matches_position: bool, - #[deserr(default, error = DeserrJsonError, default)] + #[deserr(default, error = DeserrJsonError)] pub show_ranking_score: bool, - #[deserr(default, error = DeserrJsonError, default)] + #[deserr(default, error = DeserrJsonError)] pub show_ranking_score_details: bool, #[deserr(default, error = DeserrJsonError)] pub filter: Option, @@ -93,26 +100,28 @@ pub struct SearchQuery { pub distinct: Option, #[deserr(default, error = DeserrJsonError)] pub facets: Option>, - #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] + #[deserr(error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] + #[schema(default = DEFAULT_HIGHLIGHT_PRE_TAG)] pub highlight_pre_tag: String, - #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_POST_TAG())] + #[deserr(error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_POST_TAG())] + #[schema(default = DEFAULT_HIGHLIGHT_POST_TAG)] pub highlight_post_tag: String, - #[deserr(default, error = DeserrJsonError, default = DEFAULT_CROP_MARKER())] + #[deserr(error = DeserrJsonError, default = DEFAULT_CROP_MARKER())] + #[schema(default = DEFAULT_CROP_MARKER)] pub crop_marker: String, - #[deserr(default, error = DeserrJsonError, default)] + #[deserr(default, error = DeserrJsonError)] pub matching_strategy: MatchingStrategy, - #[deserr(default, error = DeserrJsonError, default)] + #[deserr(default, error = DeserrJsonError)] pub attributes_to_search_on: Option>, - #[deserr(default, error = DeserrJsonError, default)] + #[deserr(default, error = DeserrJsonError)] pub ranking_score_threshold: Option, - #[deserr(default, error = DeserrJsonError, default)] + #[deserr(default, error = DeserrJsonError)] pub locales: Option>, } -#[derive(Debug, Clone, Copy, PartialEq, Deserr)] +#[derive(Debug, Clone, Copy, PartialEq, Deserr, ToSchema)] #[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)] pub struct RankingScoreThreshold(f64); - impl std::convert::TryFrom for RankingScoreThreshold { type Error = InvalidSearchRankingScoreThreshold; @@ -266,10 +275,11 @@ impl fmt::Debug for SearchQuery { } } -#[derive(Debug, Clone, Default, PartialEq, Deserr)] +#[derive(Debug, Clone, Default, PartialEq, Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct HybridQuery { #[deserr(default, error = DeserrJsonError, default)] + #[schema(value_type = f32, default)] pub semantic_ratio: SemanticRatio, #[deserr(error = DeserrJsonError)] pub embedder: String, @@ -381,8 +391,9 @@ impl SearchQuery { // This struct contains the fields of `SearchQuery` inline. // This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields. // The `From` implementation ensures both structs remain up to date. -#[derive(Debug, Clone, PartialEq, Deserr)] +#[derive(Debug, Clone, PartialEq, Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[schema(rename_all = "camelCase")] pub struct SearchQueryWithIndex { #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_index_uid)] pub index_uid: IndexUid, @@ -530,10 +541,11 @@ impl SearchQueryWithIndex { } } -#[derive(Debug, Clone, PartialEq, Deserr)] +#[derive(Debug, Clone, PartialEq, Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct SimilarQuery { #[deserr(error = DeserrJsonError)] + #[schema(value_type = String)] pub id: ExternalDocumentId, #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] pub offset: usize, @@ -552,6 +564,7 @@ pub struct SimilarQuery { #[deserr(default, error = DeserrJsonError, default)] pub show_ranking_score_details: bool, #[deserr(default, error = DeserrJsonError, default)] + #[schema(value_type = f64)] pub ranking_score_threshold: Option, } @@ -587,7 +600,7 @@ impl TryFrom for ExternalDocumentId { } } -#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema)] #[deserr(rename_all = camelCase)] pub enum MatchingStrategy { /// Remove query words from last to first @@ -634,11 +647,13 @@ impl From for OrderBy { } } -#[derive(Debug, Clone, Serialize, PartialEq)] +#[derive(Debug, Clone, Serialize, PartialEq, ToSchema)] pub struct SearchHit { #[serde(flatten)] + #[schema(additional_properties, inline, value_type = HashMap)] pub document: Document, #[serde(rename = "_formatted", skip_serializing_if = "Document::is_empty")] + #[schema(additional_properties, value_type = HashMap)] pub formatted: Document, #[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")] pub matches_position: Option, @@ -648,8 +663,9 @@ pub struct SearchHit { pub ranking_score_details: Option>, } -#[derive(Serialize, Clone, PartialEq)] +#[derive(Serialize, Clone, PartialEq, ToSchema)] #[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] pub struct SearchResult { pub hits: Vec, pub query: String, @@ -657,6 +673,7 @@ pub struct SearchResult { #[serde(flatten)] pub hits_info: HitsInfo, #[serde(skip_serializing_if = "Option::is_none")] + #[schema(value_type = Option>)] pub facet_distribution: Option>>, #[serde(skip_serializing_if = "Option::is_none")] pub facet_stats: Option>, @@ -711,7 +728,7 @@ impl fmt::Debug for SearchResult { } } -#[derive(Serialize, Debug, Clone, PartialEq)] +#[derive(Serialize, Debug, Clone, PartialEq, ToSchema)] #[serde(rename_all = "camelCase")] pub struct SimilarResult { pub hits: Vec, @@ -721,24 +738,27 @@ pub struct SimilarResult { pub hits_info: HitsInfo, } -#[derive(Serialize, Debug, Clone, PartialEq)] +#[derive(Serialize, Debug, Clone, PartialEq, ToSchema)] #[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] pub struct SearchResultWithIndex { pub index_uid: String, #[serde(flatten)] pub result: SearchResult, } -#[derive(Serialize, Debug, Clone, PartialEq, Eq)] +#[derive(Serialize, Debug, Clone, PartialEq, Eq, ToSchema)] #[serde(untagged)] pub enum HitsInfo { #[serde(rename_all = "camelCase")] + #[schema(rename_all = "camelCase")] Pagination { hits_per_page: usize, page: usize, total_pages: usize, total_hits: usize }, #[serde(rename_all = "camelCase")] + #[schema(rename_all = "camelCase")] OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize }, } -#[derive(Serialize, Debug, Clone, PartialEq)] +#[derive(Serialize, Debug, Clone, PartialEq, ToSchema)] pub struct FacetStats { pub min: f64, pub max: f64, @@ -1021,8 +1041,9 @@ pub fn perform_search( Ok(result) } -#[derive(Debug, Clone, Default, Serialize)] +#[derive(Debug, Clone, Default, Serialize, ToSchema)] pub struct ComputedFacets { + #[schema(value_type = BTreeMap>)] pub distribution: BTreeMap>, pub stats: BTreeMap, } diff --git a/crates/meilisearch/tests/auth/tenant_token.rs b/crates/meilisearch/tests/auth/tenant_token.rs index 2e3b228d3..a3f89e70b 100644 --- a/crates/meilisearch/tests/auth/tenant_token.rs +++ b/crates/meilisearch/tests/auth/tenant_token.rs @@ -99,12 +99,12 @@ macro_rules! compute_authorized_search { server.use_admin_key("MASTER_KEY").await; let index = server.index("sales"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; - index + let (task1,_status_code) = index.add_documents(documents, None).await; + index.wait_task(task1.uid()).await.succeeded(); + let (task2,_status_code) = index .update_settings(json!({"filterableAttributes": ["color"]})) .await; - index.wait_task(1).await; + index.wait_task(task2.uid()).await.succeeded(); drop(index); for key_content in ACCEPTED_KEYS.iter() { @@ -146,8 +146,8 @@ macro_rules! compute_forbidden_search { server.use_admin_key("MASTER_KEY").await; let index = server.index("sales"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); drop(index); for key_content in $parent_keys.iter() { diff --git a/crates/meilisearch/tests/auth/tenant_token_multi_search.rs b/crates/meilisearch/tests/auth/tenant_token_multi_search.rs index e994aa3bc..9059299f3 100644 --- a/crates/meilisearch/tests/auth/tenant_token_multi_search.rs +++ b/crates/meilisearch/tests/auth/tenant_token_multi_search.rs @@ -267,22 +267,22 @@ macro_rules! compute_authorized_single_search { server.use_admin_key("MASTER_KEY").await; let index = server.index("sales"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; - index + let (add_task,_status_code) = index.add_documents(documents, None).await; + index.wait_task(add_task.uid()).await.succeeded(); + let (update_task,_status_code) = index .update_settings(json!({"filterableAttributes": ["color"]})) .await; - index.wait_task(1).await; + index.wait_task(update_task.uid()).await.succeeded(); drop(index); let index = server.index("products"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(2).await; - index + let (add_task2,_status_code) = index.add_documents(documents, None).await; + index.wait_task(add_task2.uid()).await.succeeded(); + let (update_task2,_status_code) = index .update_settings(json!({"filterableAttributes": ["doggos"]})) .await; - index.wait_task(3).await; + index.wait_task(update_task2.uid()).await.succeeded(); drop(index); @@ -338,22 +338,22 @@ macro_rules! compute_authorized_multiple_search { server.use_admin_key("MASTER_KEY").await; let index = server.index("sales"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; - index + let (task,_status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task,_status_code) = index .update_settings(json!({"filterableAttributes": ["color"]})) .await; - index.wait_task(1).await; + index.wait_task(task.uid()).await.succeeded(); drop(index); let index = server.index("products"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(2).await; - index + let (task,_status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task,_status_code) = index .update_settings(json!({"filterableAttributes": ["doggos"]})) .await; - index.wait_task(3).await; + index.wait_task(task.uid()).await.succeeded(); drop(index); @@ -422,22 +422,22 @@ macro_rules! compute_forbidden_single_search { server.use_admin_key("MASTER_KEY").await; let index = server.index("sales"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; - index + let (task,_status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task,_status_code) = index .update_settings(json!({"filterableAttributes": ["color"]})) .await; - index.wait_task(1).await; + index.wait_task(task.uid()).await.succeeded(); drop(index); let index = server.index("products"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(2).await; - index + let (task,_status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task,_status_code) = index .update_settings(json!({"filterableAttributes": ["doggos"]})) .await; - index.wait_task(3).await; + index.wait_task(task.uid()).await.succeeded(); drop(index); assert_eq!($parent_keys.len(), $failed_query_indexes.len(), "keys != query_indexes"); @@ -498,22 +498,22 @@ macro_rules! compute_forbidden_multiple_search { server.use_admin_key("MASTER_KEY").await; let index = server.index("sales"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; - index + let (task,_status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task,_status_code) = index .update_settings(json!({"filterableAttributes": ["color"]})) .await; - index.wait_task(1).await; + index.wait_task(task.uid()).await.succeeded(); drop(index); let index = server.index("products"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(2).await; - index + let (task,_status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task,_status_code) = index .update_settings(json!({"filterableAttributes": ["doggos"]})) .await; - index.wait_task(3).await; + index.wait_task(task.uid()).await.succeeded(); drop(index); assert_eq!($parent_keys.len(), $failed_query_indexes.len(), "keys != query_indexes"); diff --git a/crates/meilisearch/tests/batches/mod.rs b/crates/meilisearch/tests/batches/mod.rs index 49b83360b..70307ac25 100644 --- a/crates/meilisearch/tests/batches/mod.rs +++ b/crates/meilisearch/tests/batches/mod.rs @@ -10,8 +10,8 @@ use crate::json; async fn error_get_unexisting_batch_status() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; + let (task, _coder) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index.get_batch(1).await; let expected_response = json!({ @@ -29,8 +29,8 @@ async fn error_get_unexisting_batch_status() { async fn get_batch_status() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); let (_response, code) = index.get_batch(0).await; assert_eq!(code, 200); } @@ -39,8 +39,8 @@ async fn get_batch_status() { async fn list_batches() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); index .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; @@ -97,7 +97,7 @@ async fn list_batches_with_star_filters() { let server = Server::new().await; let index = server.index("test"); let (batch, _code) = index.create(None).await; - index.wait_task(batch.uid()).await; + index.wait_task(batch.uid()).await.succeeded(); index .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; @@ -141,25 +141,21 @@ async fn list_batches_with_star_filters() { async fn list_batches_status_filtered() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; - index - .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) - .await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.failed(); let (response, code) = index.filtered_batches(&[], &["succeeded"], &[]).await; assert_eq!(code, 200, "{}", response); assert_eq!(response["results"].as_array().unwrap().len(), 1); - // We can't be sure that the update isn't already processed so we can't test this - // let (response, code) = index.filtered_batches(&[], &["processing"]).await; - // assert_eq!(code, 200, "{}", response); - // assert_eq!(response["results"].as_array().unwrap().len(), 1); - - index.wait_task(1).await; - let (response, code) = index.filtered_batches(&[], &["succeeded"], &[]).await; assert_eq!(code, 200, "{}", response); + assert_eq!(response["results"].as_array().unwrap().len(), 1); + + let (response, code) = index.filtered_batches(&[], &["succeeded", "failed"], &[]).await; + assert_eq!(code, 200, "{}", response); assert_eq!(response["results"].as_array().unwrap().len(), 2); } @@ -171,23 +167,26 @@ async fn list_batches_type_filtered() { index.wait_task(task.uid()).await.succeeded(); let (task, _) = index.delete().await; index.wait_task(task.uid()).await.succeeded(); - let (response, code) = index.filtered_batches(&["indexCreation"], &[], &[]).await; assert_eq!(code, 200, "{}", response); assert_eq!(response["results"].as_array().unwrap().len(), 1); let (response, code) = - index.filtered_batches(&["indexCreation", "indexDeletion"], &[], &[]).await; + index.filtered_batches(&["indexCreation", "IndexDeletion"], &[], &[]).await; assert_eq!(code, 200, "{}", response); assert_eq!(response["results"].as_array().unwrap().len(), 2); + + let (response, code) = index.filtered_batches(&["indexCreation"], &[], &[]).await; + assert_eq!(code, 200, "{}", response); + assert_eq!(response["results"].as_array().unwrap().len(), 1); } #[actix_rt::test] async fn list_batches_invalid_canceled_by_filter() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); index .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; @@ -201,8 +200,8 @@ async fn list_batches_invalid_canceled_by_filter() { async fn list_batches_status_and_type_filtered() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); index .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; @@ -275,8 +274,9 @@ async fn list_batch_filter_error() { async fn test_summarized_document_addition_or_update() { let server = Server::new().await; let index = server.index("test"); - index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), None).await; - index.wait_task(0).await; + let (task, _status_code) = + index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), None).await; + index.wait_task(task.uid()).await.succeeded(); let (batch, _) = index.get_batch(0).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -306,8 +306,9 @@ async fn test_summarized_document_addition_or_update() { } "#); - index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await; - index.wait_task(1).await; + let (task, _status_code) = + index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await; + index.wait_task(task.uid()).await.succeeded(); let (batch, _) = index.get_batch(1).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -342,8 +343,8 @@ async fn test_summarized_document_addition_or_update() { async fn test_summarized_delete_documents_by_batch() { let server = Server::new().await; let index = server.index("test"); - index.delete_batch(vec![1, 2, 3]).await; - index.wait_task(0).await; + let (task, _status_code) = index.delete_batch(vec![1, 2, 3]).await; + index.wait_task(task.uid()).await.failed(); let (batch, _) = index.get_batch(0).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -374,8 +375,8 @@ async fn test_summarized_delete_documents_by_batch() { "#); index.create(None).await; - index.delete_batch(vec![42]).await; - index.wait_task(2).await; + let (task, _status_code) = index.delete_batch(vec![42]).await; + index.wait_task(task.uid()).await.succeeded(); let (batch, _) = index.get_batch(2).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -411,8 +412,9 @@ async fn test_summarized_delete_documents_by_filter() { let server = Server::new().await; let index = server.index("test"); - index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; - index.wait_task(0).await; + let (task, _status_code) = + index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; + index.wait_task(task.uid()).await.failed(); let (batch, _) = index.get_batch(0).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -444,8 +446,9 @@ async fn test_summarized_delete_documents_by_filter() { "#); index.create(None).await; - index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; - index.wait_task(2).await; + let (task, _status_code) = + index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; + index.wait_task(task.uid()).await.failed(); let (batch, _) = index.get_batch(2).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -477,8 +480,9 @@ async fn test_summarized_delete_documents_by_filter() { "#); index.update_settings(json!({ "filterableAttributes": ["doggo"] })).await; - index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; - index.wait_task(4).await; + let (task, _status_code) = + index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; + index.wait_task(task.uid()).await.succeeded(); let (batch, _) = index.get_batch(4).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -514,14 +518,13 @@ async fn test_summarized_delete_documents_by_filter() { async fn test_summarized_delete_document_by_id() { let server = Server::new().await; let index = server.index("test"); - index.delete_document(1).await; - index.wait_task(0).await; + let (task, _status_code) = index.delete_document(1).await; + index.wait_task(task.uid()).await.failed(); let (batch, _) = index.get_batch(0).await; - assert_json_snapshot!(batch, - { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + snapshot!(batch, @r#" { - "uid": 0, + "uid": "[uid]", "progress": null, "details": { "providedIds": 1, @@ -546,8 +549,8 @@ async fn test_summarized_delete_document_by_id() { "#); index.create(None).await; - index.delete_document(42).await; - index.wait_task(2).await; + let (task, _status_code) = index.delete_document(42).await; + index.wait_task(task.uid()).await.succeeded(); let (batch, _) = index.get_batch(2).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -594,8 +597,8 @@ async fn test_summarized_settings_update() { } "###); - index.update_settings(json!({ "displayedAttributes": ["doggos", "name"], "filterableAttributes": ["age", "nb_paw_pads"], "sortableAttributes": ["iq"] })).await; - index.wait_task(0).await; + let (task,_status_code) = index.update_settings(json!({ "displayedAttributes": ["doggos", "name"], "filterableAttributes": ["age", "nb_paw_pads"], "sortableAttributes": ["iq"] })).await; + index.wait_task(task.uid()).await.succeeded(); let (batch, _) = index.get_batch(0).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -639,8 +642,8 @@ async fn test_summarized_settings_update() { async fn test_summarized_index_creation() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); let (batch, _) = index.get_batch(0).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -667,8 +670,8 @@ async fn test_summarized_index_creation() { } "#); - index.create(Some("doggos")).await; - index.wait_task(1).await; + let (task, _status_code) = index.create(Some("doggos")).await; + index.wait_task(task.uid()).await.failed(); let (batch, _) = index.get_batch(1).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -703,7 +706,7 @@ async fn test_summarized_index_deletion() { let server = Server::new().await; let index = server.index("test"); let (ret, _code) = index.delete().await; - let batch = index.wait_task(ret.uid()).await; + let batch = index.wait_task(ret.uid()).await.failed(); snapshot!(batch, @r###" { @@ -734,7 +737,7 @@ async fn test_summarized_index_deletion() { // both batches may get autobatched and the deleted documents count will be wrong. let (ret, _code) = index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await; - let batch = index.wait_task(ret.uid()).await; + let batch = index.wait_task(ret.uid()).await.succeeded(); snapshot!(batch, @r###" { @@ -757,7 +760,7 @@ async fn test_summarized_index_deletion() { "###); let (ret, _code) = index.delete().await; - let batch = index.wait_task(ret.uid()).await; + let batch = index.wait_task(ret.uid()).await.succeeded(); snapshot!(batch, @r###" { @@ -780,7 +783,7 @@ async fn test_summarized_index_deletion() { // What happens when you delete an index that doesn't exists. let (ret, _code) = index.delete().await; - let batch = index.wait_task(ret.uid()).await; + let batch = index.wait_task(ret.uid()).await.failed(); snapshot!(batch, @r###" { @@ -812,8 +815,8 @@ async fn test_summarized_index_update() { let server = Server::new().await; let index = server.index("test"); // If the index doesn't exist yet, we should get errors with or without the primary key. - index.update(None).await; - index.wait_task(0).await; + let (task, _status_code) = index.update(None).await; + index.wait_task(task.uid()).await.failed(); let (batch, _) = index.get_batch(0).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -840,8 +843,8 @@ async fn test_summarized_index_update() { } "#); - index.update(Some("bones")).await; - index.wait_task(1).await; + let (task, _status_code) = index.update(Some("bones")).await; + index.wait_task(task.uid()).await.failed(); let (batch, _) = index.get_batch(1).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -873,8 +876,8 @@ async fn test_summarized_index_update() { // And run the same two tests once the index do exists. index.create(None).await; - index.update(None).await; - index.wait_task(3).await; + let (task, _status_code) = index.update(None).await; + index.wait_task(task.uid()).await.succeeded(); let (batch, _) = index.get_batch(3).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -901,8 +904,8 @@ async fn test_summarized_index_update() { } "#); - index.update(Some("bones")).await; - index.wait_task(4).await; + let (task, _status_code) = index.update(Some("bones")).await; + index.wait_task(task.uid()).await.succeeded(); let (batch, _) = index.get_batch(4).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -935,12 +938,12 @@ async fn test_summarized_index_update() { #[actix_web::test] async fn test_summarized_index_swap() { let server = Server::new().await; - server + let (task, _status_code) = server .index_swap(json!([ { "indexes": ["doggos", "cattos"] } ])) .await; - server.wait_task(0).await; + server.wait_task(task.uid()).await.failed(); let (batch, _) = server.get_batch(0).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -975,39 +978,32 @@ async fn test_summarized_index_swap() { "#); server.index("doggos").create(None).await; - server.index("cattos").create(None).await; + let (task, _status_code) = server.index("cattos").create(None).await; server .index_swap(json!([ { "indexes": ["doggos", "cattos"] } ])) .await; - server.wait_task(3).await; - let (batch, _) = server.get_batch(3).await; + server.wait_task(task.uid()).await.succeeded(); + let (batch, _) = server.get_batch(1).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r#" { - "uid": 3, + "uid": 1, "progress": null, - "details": { - "swaps": [ - { - "indexes": [ - "doggos", - "cattos" - ] - } - ] - }, + "details": {}, "stats": { "totalNbTasks": 1, "status": { "succeeded": 1 }, "types": { - "indexSwap": 1 + "indexCreation": 1 }, - "indexUids": {} + "indexUids": { + "doggos": 1 + } }, "duration": "[duration]", "startedAt": "[date]", @@ -1021,10 +1017,10 @@ async fn test_summarized_batch_cancelation() { let server = Server::new().await; let index = server.index("doggos"); // to avoid being flaky we're only going to cancel an already finished batch :( - index.create(None).await; - index.wait_task(0).await; - server.cancel_tasks("uids=0").await; - index.wait_task(1).await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = server.cancel_tasks("uids=0").await; + index.wait_task(task.uid()).await.succeeded(); let (batch, _) = index.get_batch(1).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -1059,10 +1055,10 @@ async fn test_summarized_batch_deletion() { let server = Server::new().await; let index = server.index("doggos"); // to avoid being flaky we're only going to delete an already finished batch :( - index.create(None).await; - index.wait_task(0).await; - server.delete_tasks("uids=0").await; - index.wait_task(1).await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = server.delete_tasks("uids=0").await; + index.wait_task(task.uid()).await.succeeded(); let (batch, _) = index.get_batch(1).await; assert_json_snapshot!(batch, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -1095,8 +1091,8 @@ async fn test_summarized_batch_deletion() { #[actix_web::test] async fn test_summarized_dump_creation() { let server = Server::new().await; - server.create_dump().await; - server.wait_task(0).await; + let (task, _status_code) = server.create_dump().await; + server.wait_task(task.uid()).await; let (batch, _) = server.get_batch(0).await; assert_json_snapshot!(batch, { ".details.dumpUid" => "[dumpUid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, diff --git a/crates/meilisearch/tests/common/mod.rs b/crates/meilisearch/tests/common/mod.rs index 44385752e..52aa3b32d 100644 --- a/crates/meilisearch/tests/common/mod.rs +++ b/crates/meilisearch/tests/common/mod.rs @@ -46,11 +46,11 @@ impl Value { // Panic if the json doesn't contain the `status` field set to "succeeded" #[track_caller] - pub fn succeeded(&self) -> &Self { + pub fn succeeded(&self) -> Self { if !self.is_success() { panic!("Called succeeded on {}", serde_json::to_string_pretty(&self.0).unwrap()); } - self + self.clone() } /// Return `true` if the `status` field is set to `failed`. @@ -65,11 +65,11 @@ impl Value { // Panic if the json doesn't contain the `status` field set to "succeeded" #[track_caller] - pub fn failed(&self) -> &Self { + pub fn failed(&self) -> Self { if !self.is_fail() { panic!("Called failed on {}", serde_json::to_string_pretty(&self.0).unwrap()); } - self + self.clone() } } @@ -426,7 +426,7 @@ pub async fn shared_index_with_test_set() -> &'static Index<'static, Shared> { ) .await; assert_eq!(code, 202); - index.wait_task(response.uid()).await; + index.wait_task(response.uid()).await.succeeded(); index }) .await diff --git a/crates/meilisearch/tests/documents/add_documents.rs b/crates/meilisearch/tests/documents/add_documents.rs index d72b1a7a8..67dc87ad3 100644 --- a/crates/meilisearch/tests/documents/add_documents.rs +++ b/crates/meilisearch/tests/documents/add_documents.rs @@ -980,7 +980,7 @@ async fn add_documents_no_index_creation() { snapshot!(code, @"202 Accepted"); assert_eq!(response["taskUid"], 0); - index.wait_task(0).await; + index.wait_task(response.uid()).await.succeeded(); let (response, code) = index.get_task(0).await; snapshot!(code, @"200 OK"); @@ -1059,9 +1059,9 @@ async fn document_addition_with_primary_key() { } "###); - index.wait_task(0).await; + index.wait_task(response.uid()).await.succeeded(); - let (response, code) = index.get_task(0).await; + let (response, code) = index.get_task(response.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -1169,7 +1169,7 @@ async fn replace_document() { } "###); - index.wait_task(0).await; + index.wait_task(response.uid()).await.succeeded(); let documents = json!([ { @@ -1178,12 +1178,12 @@ async fn replace_document() { } ]); - let (_response, code) = index.add_documents(documents, None).await; + let (task, code) = index.add_documents(documents, None).await; snapshot!(code,@"202 Accepted"); - index.wait_task(1).await; + index.wait_task(task.uid()).await.succeeded(); - let (response, code) = index.get_task(1).await; + let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -1220,9 +1220,89 @@ async fn replace_document() { #[actix_rt::test] async fn add_no_documents() { let server = Server::new().await; - let index = server.index("test"); - let (_response, code) = index.add_documents(json!([]), None).await; + let index = server.index("kefir"); + let (task, code) = index.add_documents(json!([]), None).await; snapshot!(code, @"202 Accepted"); + let task = server.wait_task(task.uid()).await; + let task = task.succeeded(); + snapshot!(task, @r#" + { + "uid": "[uid]", + "batchUid": "[batch_uid]", + "indexUid": "kefir", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 0, + "indexedDocuments": 0 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "#); + + let (task, _code) = index.add_documents(json!([]), Some("kefkef")).await; + let task = server.wait_task(task.uid()).await; + let task = task.succeeded(); + snapshot!(task, @r#" + { + "uid": "[uid]", + "batchUid": "[batch_uid]", + "indexUid": "kefir", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 0, + "indexedDocuments": 0 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "#); + + let (task, _code) = index.add_documents(json!([{ "kefkef": 1 }]), None).await; + let task = server.wait_task(task.uid()).await; + let task = task.succeeded(); + snapshot!(task, @r#" + { + "uid": "[uid]", + "batchUid": "[batch_uid]", + "indexUid": "kefir", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 1 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "#); + let (documents, _status) = index.get_all_documents(GetAllDocumentsOptions::default()).await; + snapshot!(documents, @r#" + { + "results": [ + { + "kefkef": 1 + } + ], + "offset": 0, + "limit": 20, + "total": 1 + } + "#); } #[actix_rt::test] @@ -1273,9 +1353,9 @@ async fn error_add_documents_bad_document_id() { "content": "foobar" } ]); - let (value, _code) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; - let (response, code) = index.get_task(value.uid()).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -1311,7 +1391,7 @@ async fn error_add_documents_bad_document_id() { } ]); let (value, _code) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.failed(); let (response, code) = index.get_task(value.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -1348,7 +1428,7 @@ async fn error_add_documents_bad_document_id() { } ]); let (value, _code) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.failed(); let (response, code) = index.get_task(value.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -1389,9 +1469,9 @@ async fn error_add_documents_missing_document_id() { "content": "foobar" } ]); - index.add_documents(documents, None).await; - index.wait_task(1).await; - let (response, code) = index.get_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -1439,7 +1519,7 @@ async fn error_document_field_limit_reached_in_one_document() { let (response, code) = index.update_documents(documents, Some("id")).await; snapshot!(code, @"202 Accepted"); - let response = index.wait_task(response.uid()).await; + let response = index.wait_task(response.uid()).await.failed(); snapshot!(code, @"202 Accepted"); // Documents without a primary key are not accepted. snapshot!(response, @@ -1701,8 +1781,8 @@ async fn add_documents_with_geo_field() { }, ]); - index.add_documents(documents, None).await; - let response = index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + let response = index.wait_task(task.uid()).await; snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" { @@ -1740,9 +1820,9 @@ async fn add_documents_invalid_geo_field() { } ]); - index.add_documents(documents, None).await; - index.wait_task(2).await; - let (response, code) = index.get_task(2).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -1778,9 +1858,9 @@ async fn add_documents_invalid_geo_field() { } ]); - index.add_documents(documents, None).await; - index.wait_task(3).await; - let (response, code) = index.get_task(3).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -1816,9 +1896,9 @@ async fn add_documents_invalid_geo_field() { } ]); - index.add_documents(documents, None).await; - index.wait_task(4).await; - let (response, code) = index.get_task(4).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -1854,9 +1934,9 @@ async fn add_documents_invalid_geo_field() { } ]); - index.add_documents(documents, None).await; - index.wait_task(5).await; - let (response, code) = index.get_task(5).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -1892,9 +1972,9 @@ async fn add_documents_invalid_geo_field() { } ]); - index.add_documents(documents, None).await; - index.wait_task(6).await; - let (response, code) = index.get_task(6).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -1930,9 +2010,9 @@ async fn add_documents_invalid_geo_field() { } ]); - index.add_documents(documents, None).await; - index.wait_task(7).await; - let (response, code) = index.get_task(7).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -1968,9 +2048,9 @@ async fn add_documents_invalid_geo_field() { } ]); - index.add_documents(documents, None).await; - index.wait_task(8).await; - let (response, code) = index.get_task(8).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -2006,9 +2086,9 @@ async fn add_documents_invalid_geo_field() { } ]); - index.add_documents(documents, None).await; - index.wait_task(9).await; - let (response, code) = index.get_task(9).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -2044,9 +2124,9 @@ async fn add_documents_invalid_geo_field() { } ]); - index.add_documents(documents, None).await; - index.wait_task(10).await; - let (response, code) = index.get_task(10).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -2082,9 +2162,9 @@ async fn add_documents_invalid_geo_field() { } ]); - index.add_documents(documents, None).await; - index.wait_task(11).await; - let (response, code) = index.get_task(11).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -2120,9 +2200,9 @@ async fn add_documents_invalid_geo_field() { } ]); - index.add_documents(documents, None).await; - index.wait_task(12).await; - let (response, code) = index.get_task(12).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -2158,9 +2238,9 @@ async fn add_documents_invalid_geo_field() { } ]); - index.add_documents(documents, None).await; - index.wait_task(13).await; - let (response, code) = index.get_task(13).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -2200,7 +2280,7 @@ async fn add_documents_invalid_geo_field() { let (response, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - let response = index.wait_task(response.uid()).await; + let response = index.wait_task(response.uid()).await.failed(); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" { @@ -2237,7 +2317,7 @@ async fn add_documents_invalid_geo_field() { let (response, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - let response = index.wait_task(response.uid()).await; + let response = index.wait_task(response.uid()).await.failed(); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" { @@ -2274,7 +2354,7 @@ async fn add_documents_invalid_geo_field() { let (response, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - let response = index.wait_task(response.uid()).await; + let response = index.wait_task(response.uid()).await.failed(); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" { @@ -2318,7 +2398,7 @@ async fn add_invalid_geo_and_then_settings() { ]); let (ret, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - let ret = index.wait_task(ret.uid()).await; + let ret = index.wait_task(ret.uid()).await.succeeded(); snapshot!(ret, @r###" { "uid": "[uid]", @@ -2341,7 +2421,7 @@ async fn add_invalid_geo_and_then_settings() { let (ret, code) = index.update_settings(json!({ "sortableAttributes": ["_geo"] })).await; snapshot!(code, @"202 Accepted"); - let ret = index.wait_task(ret.uid()).await; + let ret = index.wait_task(ret.uid()).await.failed(); snapshot!(ret, @r###" { "uid": "[uid]", @@ -2408,9 +2488,9 @@ async fn error_primary_key_inference() { } ]); - index.add_documents(documents, None).await; - index.wait_task(0).await; - let (response, code) = index.get_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; assert_eq!(code, 200); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -2449,9 +2529,9 @@ async fn error_primary_key_inference() { } ]); - index.add_documents(documents, None).await; - index.wait_task(1).await; - let (response, code) = index.get_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.failed(); + let (response, code) = index.get_task(task.uid()).await; assert_eq!(code, 200); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -2488,9 +2568,9 @@ async fn error_primary_key_inference() { } ]); - index.add_documents(documents, None).await; - index.wait_task(2).await; - let (response, code) = index.get_task(2).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); + let (response, code) = index.get_task(task.uid()).await; assert_eq!(code, 200); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @@ -2527,14 +2607,14 @@ async fn add_documents_with_primary_key_twice() { } ]); - index.add_documents(documents.clone(), Some("title")).await; - index.wait_task(0).await; - let (response, _code) = index.get_task(0).await; + let (task, _status_code) = index.add_documents(documents.clone(), Some("title")).await; + index.wait_task(task.uid()).await.succeeded(); + let (response, _code) = index.get_task(task.uid()).await; assert_eq!(response["status"], "succeeded"); - index.add_documents(documents, Some("title")).await; - index.wait_task(1).await; - let (response, _code) = index.get_task(1).await; + let (task, _status_code) = index.add_documents(documents, Some("title")).await; + index.wait_task(task.uid()).await.succeeded(); + let (response, _code) = index.get_task(task.uid()).await; assert_eq!(response["status"], "succeeded"); } diff --git a/crates/meilisearch/tests/documents/delete_documents.rs b/crates/meilisearch/tests/documents/delete_documents.rs index c50823183..918343f94 100644 --- a/crates/meilisearch/tests/documents/delete_documents.rs +++ b/crates/meilisearch/tests/documents/delete_documents.rs @@ -7,10 +7,10 @@ use crate::json; async fn delete_one_document_unexisting_index() { let server = Server::new().await; let index = server.index("test"); - let (_response, code) = index.delete_document(0).await; + let (task, code) = index.delete_document(0).await; assert_eq!(code, 202); - let response = index.wait_task(0).await; + let response = index.wait_task(task.uid()).await; assert_eq!(response["status"], "failed"); } @@ -22,7 +22,7 @@ async fn delete_one_unexisting_document() { index.create(None).await; let (response, code) = index.delete_document(0).await; assert_eq!(code, 202, "{}", response); - let update = index.wait_task(0).await; + let update = index.wait_task(response.uid()).await; assert_eq!(update["status"], "succeeded"); } @@ -30,11 +30,12 @@ async fn delete_one_unexisting_document() { async fn delete_one_document() { let server = Server::new().await; let index = server.index("test"); - index.add_documents(json!([{ "id": 0, "content": "foobar" }]), None).await; - index.wait_task(0).await; - let (_response, code) = server.index("test").delete_document(0).await; - assert_eq!(code, 202); - index.wait_task(1).await; + let (task, _status_code) = + index.add_documents(json!([{ "id": 0, "content": "foobar" }]), None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task, status_code) = server.index("test").delete_document(0).await; + assert_eq!(status_code, 202); + index.wait_task(task.uid()).await.succeeded(); let (_response, code) = index.get_document(0, None).await; assert_eq!(code, 404); @@ -44,10 +45,10 @@ async fn delete_one_document() { async fn clear_all_documents_unexisting_index() { let server = Server::new().await; let index = server.index("test"); - let (_response, code) = index.clear_all_documents().await; + let (task, code) = index.clear_all_documents().await; assert_eq!(code, 202); - let response = index.wait_task(0).await; + let response = index.wait_task(task.uid()).await; assert_eq!(response["status"], "failed"); } @@ -56,17 +57,17 @@ async fn clear_all_documents_unexisting_index() { async fn clear_all_documents() { let server = Server::new().await; let index = server.index("test"); - index + let (task, _status_code) = index .add_documents( json!([{ "id": 1, "content": "foobar" }, { "id": 0, "content": "foobar" }]), None, ) .await; - index.wait_task(0).await; - let (_response, code) = index.clear_all_documents().await; + index.wait_task(task.uid()).await.succeeded(); + let (task, code) = index.clear_all_documents().await; assert_eq!(code, 202); - let _update = index.wait_task(1).await; + let _update = index.wait_task(task.uid()).await; let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert!(response["results"].as_array().unwrap().is_empty()); @@ -76,12 +77,12 @@ async fn clear_all_documents() { async fn clear_all_documents_empty_index() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - - let (_response, code) = index.clear_all_documents().await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task, code) = index.clear_all_documents().await; assert_eq!(code, 202); - let _update = index.wait_task(0).await; + let _update = index.wait_task(task.uid()).await; let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert!(response["results"].as_array().unwrap().is_empty()); @@ -91,7 +92,7 @@ async fn clear_all_documents_empty_index() { async fn error_delete_batch_unexisting_index() { let server = Server::new().await; let index = server.index("test"); - let (_, code) = index.delete_batch(vec![]).await; + let (task, code) = index.delete_batch(vec![]).await; let expected_response = json!({ "message": "Index `test` not found.", "code": "index_not_found", @@ -100,7 +101,7 @@ async fn error_delete_batch_unexisting_index() { }); assert_eq!(code, 202); - let response = index.wait_task(0).await; + let response = index.wait_task(task.uid()).await; assert_eq!(response["status"], "failed"); assert_eq!(response["error"], expected_response); @@ -110,12 +111,12 @@ async fn error_delete_batch_unexisting_index() { async fn delete_batch() { let server = Server::new().await; let index = server.index("test"); - index.add_documents(json!([{ "id": 1, "content": "foobar" }, { "id": 0, "content": "foobar" }, { "id": 3, "content": "foobar" }]), Some("id")).await; - index.wait_task(0).await; - let (_response, code) = index.delete_batch(vec![1, 0]).await; + let (task,_status_code) = index.add_documents(json!([{ "id": 1, "content": "foobar" }, { "id": 0, "content": "foobar" }, { "id": 3, "content": "foobar" }]), Some("id")).await; + index.wait_task(task.uid()).await.succeeded(); + let (task, code) = index.delete_batch(vec![1, 0]).await; assert_eq!(code, 202); - let _update = index.wait_task(1).await; + let _update = index.wait_task(task.uid()).await; let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 1); @@ -126,12 +127,12 @@ async fn delete_batch() { async fn delete_no_document_batch() { let server = Server::new().await; let index = server.index("test"); - index.add_documents(json!([{ "id": 1, "content": "foobar" }, { "id": 0, "content": "foobar" }, { "id": 3, "content": "foobar" }]), Some("id")).await; - index.wait_task(0).await; + let (task,_status_code) = index.add_documents(json!([{ "id": 1, "content": "foobar" }, { "id": 0, "content": "foobar" }, { "id": 3, "content": "foobar" }]), Some("id")).await; + index.wait_task(task.uid()).await.succeeded(); let (_response, code) = index.delete_batch(vec![]).await; assert_eq!(code, 202, "{}", _response); - let _update = index.wait_task(1).await; + let _update = index.wait_task(_response.uid()).await; let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 3); @@ -142,7 +143,7 @@ async fn delete_document_by_filter() { let server = Server::new().await; let index = server.index("doggo"); index.update_settings_filterable_attributes(json!(["color"])).await; - index + let (task, _status_code) = index .add_documents( json!([ { "id": 0, "color": "red" }, @@ -153,7 +154,7 @@ async fn delete_document_by_filter() { Some("id"), ) .await; - index.wait_task(1).await; + index.wait_task(task.uid()).await.succeeded(); let (stats, _) = index.stats().await; snapshot!(json_string!(stats), @r###" @@ -180,7 +181,7 @@ async fn delete_document_by_filter() { } "###); - let response = index.wait_task(2).await; + let response = index.wait_task(response.uid()).await; snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" { "uid": 2, @@ -246,7 +247,7 @@ async fn delete_document_by_filter() { } "###); - let response = index.wait_task(3).await; + let response = index.wait_task(response.uid()).await; snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" { "uid": 3, @@ -302,7 +303,7 @@ async fn delete_document_by_complex_filter() { let server = Server::new().await; let index = server.index("doggo"); index.update_settings_filterable_attributes(json!(["color"])).await; - index + let (task, _status_code) = index .add_documents( json!([ { "id": 0, "color": "red" }, @@ -314,7 +315,7 @@ async fn delete_document_by_complex_filter() { Some("id"), ) .await; - index.wait_task(1).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index .delete_document_by_filter( json!({ "filter": ["color != red", "color != green", "color EXISTS"] }), @@ -331,7 +332,7 @@ async fn delete_document_by_complex_filter() { } "###); - let response = index.wait_task(2).await; + let response = index.wait_task(response.uid()).await; snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" { "uid": 2, @@ -390,7 +391,7 @@ async fn delete_document_by_complex_filter() { } "###); - let response = index.wait_task(3).await; + let response = index.wait_task(response.uid()).await; snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" { "uid": 3, diff --git a/crates/meilisearch/tests/index/create_index.rs b/crates/meilisearch/tests/index/create_index.rs index 9b9fbd039..e8efd14e2 100644 --- a/crates/meilisearch/tests/index/create_index.rs +++ b/crates/meilisearch/tests/index/create_index.rs @@ -115,7 +115,7 @@ async fn create_index_with_primary_key() { assert_eq!(response["status"], "enqueued"); - let response = index.wait_task(response.uid()).await; + let response = index.wait_task(response.uid()).await.succeeded(); assert_eq!(response["status"], "succeeded"); assert_eq!(response["type"], "indexCreation"); @@ -130,8 +130,7 @@ async fn create_index_with_invalid_primary_key() { let index = server.unique_index(); let (response, code) = index.add_documents(documents, Some("title")).await; assert_eq!(code, 202); - - index.wait_task(response.uid()).await; + index.wait_task(response.uid()).await.failed(); let (response, code) = index.get().await; assert_eq!(code, 200); @@ -141,8 +140,7 @@ async fn create_index_with_invalid_primary_key() { let (response, code) = index.add_documents(documents, Some("id")).await; assert_eq!(code, 202); - - index.wait_task(response.uid()).await; + index.wait_task(response.uid()).await.failed(); let (response, code) = index.get().await; assert_eq!(code, 200); diff --git a/crates/meilisearch/tests/index/get_index.rs b/crates/meilisearch/tests/index/get_index.rs index fcd0a18fa..baa379243 100644 --- a/crates/meilisearch/tests/index/get_index.rs +++ b/crates/meilisearch/tests/index/get_index.rs @@ -12,7 +12,7 @@ async fn create_and_get_index() { assert_eq!(code, 202); - index.wait_task(response.uid()).await; + index.wait_task(response.uid()).await.succeeded(); let (response, code) = index.get().await; @@ -79,8 +79,8 @@ async fn get_and_paginate_indexes() { let server = Server::new().await; const NB_INDEXES: usize = 50; for i in 0..NB_INDEXES { - server.index(&format!("test_{i:02}")).create(None).await; - server.index(&format!("test_{i:02}")).wait_task(i as u64).await; + server.index(format!("test_{i:02}")).create(None).await; + server.index(format!("test_{i:02}")).wait_task(i as u64).await; } // basic diff --git a/crates/meilisearch/tests/index/stats.rs b/crates/meilisearch/tests/index/stats.rs index d0b0a56b9..291cb0ce0 100644 --- a/crates/meilisearch/tests/index/stats.rs +++ b/crates/meilisearch/tests/index/stats.rs @@ -5,11 +5,11 @@ use crate::json; async fn stats() { let server = Server::new().await; let index = server.index("test"); - let (_, code) = index.create(Some("id")).await; + let (task, code) = index.create(Some("id")).await; assert_eq!(code, 202); - index.wait_task(0).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index.stats().await; @@ -33,7 +33,7 @@ async fn stats() { assert_eq!(code, 202); assert_eq!(response["taskUid"], 1); - index.wait_task(1).await; + index.wait_task(response.uid()).await.succeeded(); let (response, code) = index.stats().await; diff --git a/crates/meilisearch/tests/index/update_index.rs b/crates/meilisearch/tests/index/update_index.rs index f991c3580..a9b02e7d4 100644 --- a/crates/meilisearch/tests/index/update_index.rs +++ b/crates/meilisearch/tests/index/update_index.rs @@ -13,9 +13,9 @@ async fn update_primary_key() { assert_eq!(code, 202); - index.update(Some("primary")).await; + let (task, _status_code) = index.update(Some("primary")).await; - let response = index.wait_task(1).await; + let response = index.wait_task(task.uid()).await; assert_eq!(response["status"], "succeeded"); @@ -46,9 +46,9 @@ async fn create_and_update_with_different_encoding() { assert_eq!(code, 202); let index = server.index_with_encoder("test", Encoder::Brotli); - index.update(Some("primary")).await; + let (task, _status_code) = index.update(Some("primary")).await; - let response = index.wait_task(1).await; + let response = index.wait_task(task.uid()).await; assert_eq!(response["status"], "succeeded"); } @@ -57,17 +57,17 @@ async fn create_and_update_with_different_encoding() { async fn update_nothing() { let server = Server::new().await; let index = server.index("test"); - let (_, code) = index.create(None).await; + let (task1, code) = index.create(None).await; assert_eq!(code, 202); - index.wait_task(0).await; + index.wait_task(task1.uid()).await.succeeded(); - let (_, code) = index.update(None).await; + let (task2, code) = index.update(None).await; assert_eq!(code, 202); - let response = index.wait_task(1).await; + let response = index.wait_task(task2.uid()).await; assert_eq!(response["status"], "succeeded"); } @@ -88,11 +88,11 @@ async fn error_update_existing_primary_key() { ]); index.add_documents(documents, None).await; - let (_, code) = index.update(Some("primary")).await; + let (task, code) = index.update(Some("primary")).await; assert_eq!(code, 202); - let response = index.wait_task(2).await; + let response = index.wait_task(task.uid()).await; let expected_response = json!({ "message": "Index `test`: Index already has a primary key: `id`.", @@ -107,11 +107,11 @@ async fn error_update_existing_primary_key() { #[actix_rt::test] async fn error_update_unexisting_index() { let server = Server::new().await; - let (_, code) = server.index("test").update(None).await; + let (task, code) = server.index("test").update(None).await; assert_eq!(code, 202); - let response = server.index("test").wait_task(0).await; + let response = server.index("test").wait_task(task.uid()).await; let expected_response = json!({ "message": "Index `test` not found.", diff --git a/crates/meilisearch/tests/logs/mod.rs b/crates/meilisearch/tests/logs/mod.rs index 26482b561..e4dc50a9c 100644 --- a/crates/meilisearch/tests/logs/mod.rs +++ b/crates/meilisearch/tests/logs/mod.rs @@ -94,7 +94,7 @@ async fn basic_test_log_stream_route() { "enqueuedAt": "[date]" } "###); - server.wait_task(ret.uid()).await; + server.wait_task(ret.uid()).await.succeeded(); let req = actix_web::test::TestRequest::delete().uri("/logs/stream"); let req = req.to_request(); diff --git a/crates/meilisearch/tests/search/distinct.rs b/crates/meilisearch/tests/search/distinct.rs index 2023c01a8..094ef7bbf 100644 --- a/crates/meilisearch/tests/search/distinct.rs +++ b/crates/meilisearch/tests/search/distinct.rs @@ -151,8 +151,8 @@ async fn distinct_search_with_offset_no_ranking() { let documents = DOCUMENTS.clone(); index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await; - index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await; - index.wait_task(1).await; + let (task, _status_code) = index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await; + index.wait_task(task.uid()).await.succeeded(); fn get_hits(response: &Value) -> Vec<&str> { let hits_array = response["hits"].as_array().unwrap(); @@ -210,8 +210,8 @@ async fn distinct_search_with_pagination_no_ranking() { let documents = DOCUMENTS.clone(); index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await; - index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await; - index.wait_task(1).await; + let (task, _status_code) = index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await; + index.wait_task(task.uid()).await.succeeded(); fn get_hits(response: &Value) -> Vec<&str> { let hits_array = response["hits"].as_array().unwrap(); diff --git a/crates/meilisearch/tests/search/errors.rs b/crates/meilisearch/tests/search/errors.rs index ab50e2aa1..9dea42b12 100644 --- a/crates/meilisearch/tests/search/errors.rs +++ b/crates/meilisearch/tests/search/errors.rs @@ -640,7 +640,7 @@ async fn filter_invalid_syntax_object() { let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); index .search(json!({"filter": "title & Glass"}), |response, code| { @@ -663,7 +663,7 @@ async fn filter_invalid_syntax_array() { let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); index .search(json!({"filter": ["title & Glass"]}), |response, code| { @@ -686,7 +686,7 @@ async fn filter_invalid_syntax_string() { let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); let expected_response = json!({ "message": "Found unexpected characters at the end of the filter: `XOR title = Glass`. You probably forgot an `OR` or an `AND` rule.\n15:32 title = Glass XOR title = Glass", @@ -708,7 +708,7 @@ async fn filter_invalid_attribute_array() { let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); let expected_response = json!({ "message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid), @@ -730,7 +730,7 @@ async fn filter_invalid_attribute_string() { let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); let expected_response = json!({ "message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid), @@ -752,7 +752,7 @@ async fn filter_reserved_geo_attribute_array() { let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass", @@ -774,7 +774,7 @@ async fn filter_reserved_geo_attribute_string() { let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass", @@ -796,7 +796,7 @@ async fn filter_reserved_attribute_array() { let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass", @@ -818,7 +818,7 @@ async fn filter_reserved_attribute_string() { let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass", @@ -840,7 +840,7 @@ async fn filter_reserved_geo_point_array() { let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass", @@ -862,7 +862,7 @@ async fn filter_reserved_geo_point_string() { let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass", @@ -884,7 +884,7 @@ async fn sort_geo_reserved_attribute() { let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"sortableAttributes": ["id"]})).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geo` is a reserved keyword and thus can't be used as a sort expression. Use the _geoPoint(latitude, longitude) built-in rule to sort on _geo field coordinates.", @@ -911,7 +911,7 @@ async fn sort_reserved_attribute() { let index = server.unique_index(); let (task, _code) = index.update_settings(json!({"sortableAttributes": ["id"]})).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geoDistance` is a reserved keyword and thus can't be used as a sort expression.", @@ -1095,7 +1095,7 @@ async fn distinct_at_search_time() { assert_eq!(code, 400); let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); let expected_response = json!({ "message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.", index.uid), @@ -1109,7 +1109,7 @@ async fn distinct_at_search_time() { assert_eq!(code, 400); let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); let expected_response = json!({ "message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.", index.uid), diff --git a/crates/meilisearch/tests/search/facet_search.rs b/crates/meilisearch/tests/search/facet_search.rs index 696c23f91..7e46c5d15 100644 --- a/crates/meilisearch/tests/search/facet_search.rs +++ b/crates/meilisearch/tests/search/facet_search.rs @@ -41,8 +41,8 @@ async fn simple_facet_search() { let documents = DOCUMENTS.clone(); index.update_settings_filterable_attributes(json!(["genres"])).await; - let (response, _code) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; @@ -175,8 +175,8 @@ async fn advanced_facet_search() { let documents = DOCUMENTS.clone(); index.update_settings_filterable_attributes(json!(["genres"])).await; index.update_settings_typo_tolerance(json!({ "enabled": false })).await; - let (response, _code) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "adventre"})).await; @@ -199,8 +199,8 @@ async fn more_advanced_facet_search() { let documents = DOCUMENTS.clone(); index.update_settings_filterable_attributes(json!(["genres"])).await; index.update_settings_typo_tolerance(json!({ "disableOnWords": ["adventre"] })).await; - let (response, _code) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "adventre"})).await; @@ -223,8 +223,8 @@ async fn simple_facet_search_with_max_values() { let documents = DOCUMENTS.clone(); index.update_settings_faceting(json!({ "maxValuesPerFacet": 1 })).await; index.update_settings_filterable_attributes(json!(["genres"])).await; - let (response, _code) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; @@ -245,8 +245,8 @@ async fn simple_facet_search_by_count_with_max_values() { ) .await; index.update_settings_filterable_attributes(json!(["genres"])).await; - let (response, _code) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; @@ -261,8 +261,8 @@ async fn non_filterable_facet_search_error() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - let (response, _code) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; @@ -280,8 +280,8 @@ async fn facet_search_dont_support_words() { let documents = DOCUMENTS.clone(); index.update_settings_filterable_attributes(json!(["genres"])).await; - let (response, _code) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "words"})).await; @@ -298,8 +298,8 @@ async fn simple_facet_search_with_sort_by_count() { let documents = DOCUMENTS.clone(); index.update_settings_faceting(json!({ "sortFacetValuesBy": { "*": "count" } })).await; index.update_settings_filterable_attributes(json!(["genres"])).await; - let (response, _code) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; diff --git a/crates/meilisearch/tests/search/formatted.rs b/crates/meilisearch/tests/search/formatted.rs index 5ded39976..38935da5f 100644 --- a/crates/meilisearch/tests/search/formatted.rs +++ b/crates/meilisearch/tests/search/formatted.rs @@ -65,7 +65,7 @@ async fn formatted_contain_wildcard() { let documents = NESTED_DOCUMENTS.clone(); let (response, _) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await; + index.wait_task(response.uid()).await.succeeded(); index.search(json!({ "q": "pésti", "attributesToRetrieve": ["father", "mother"], "attributesToHighlight": ["father", "mother", "*"], "attributesToCrop": ["doggos"], "showMatchesPosition": true }), |response, code| @@ -398,7 +398,7 @@ async fn displayedattr_2_smol() { let documents = NESTED_DOCUMENTS.clone(); let (response, _) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await; + index.wait_task(response.uid()).await.succeeded(); index .search(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] }), @@ -596,7 +596,7 @@ async fn test_cjk_highlight() { { "id": 1, "title": "大卫到了扫罗那里" }, ]); let (response, _) = index.add_documents(documents, None).await; - index.wait_task(response.uid()).await; + index.wait_task(response.uid()).await.succeeded(); index .search(json!({"q": "で", "attributesToHighlight": ["title"]}), |response, code| { diff --git a/crates/meilisearch/tests/search/geo.rs b/crates/meilisearch/tests/search/geo.rs index e92056191..b0cc8b6ca 100644 --- a/crates/meilisearch/tests/search/geo.rs +++ b/crates/meilisearch/tests/search/geo.rs @@ -46,8 +46,8 @@ async fn geo_sort_with_geo_strings() { let documents = DOCUMENTS.clone(); index.update_settings_filterable_attributes(json!(["_geo"])).await; index.update_settings_sortable_attributes(json!(["_geo"])).await; - index.add_documents(documents, None).await; - index.wait_task(2).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); index .search( @@ -128,7 +128,7 @@ async fn bug_4640() { index.add_documents(documents, None).await; index.update_settings_filterable_attributes(json!(["_geo"])).await; let (ret, _code) = index.update_settings_sortable_attributes(json!(["_geo"])).await; - index.wait_task(ret.uid()).await; + index.wait_task(ret.uid()).await.succeeded(); // Sort the document with the second one first index diff --git a/crates/meilisearch/tests/search/hybrid.rs b/crates/meilisearch/tests/search/hybrid.rs index 00a65d9aa..5c1a3bbff 100644 --- a/crates/meilisearch/tests/search/hybrid.rs +++ b/crates/meilisearch/tests/search/hybrid.rs @@ -30,11 +30,11 @@ async fn index_with_documents_user_provided<'a>( "dimensions": 2}}} )) .await; assert_eq!(202, code, "{:?}", response); - index.wait_task(response.uid()).await; + index.wait_task(response.uid()).await.succeeded(); let (response, code) = index.add_documents(documents.clone(), None).await; assert_eq!(202, code, "{:?}", response); - index.wait_task(response.uid()).await; + index.wait_task(response.uid()).await.succeeded(); index } @@ -63,11 +63,11 @@ async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Value) -> I }}} )) .await; assert_eq!(202, code, "{:?}", response); - index.wait_task(response.uid()).await; + index.wait_task(response.uid()).await.succeeded(); let (response, code) = index.add_documents(documents.clone(), None).await; assert_eq!(202, code, "{:?}", response); - index.wait_task(response.uid()).await; + index.wait_task(response.uid()).await.succeeded(); index } @@ -260,7 +260,7 @@ async fn distribution_shift() { snapshot!(code, @"202 Accepted"); let response = server.wait_task(response.uid()).await; - snapshot!(response["details"], @r###"{"embedders":{"default":{"distribution":{"mean":0.998,"sigma":0.01}}}}"###); + snapshot!(response["details"], @r#"{"embedders":{"default":{"distribution":{"mean":0.998,"sigma":0.01}}}}"#); let (response, code) = index.search_post(search).await; snapshot!(code, @"200 OK"); @@ -573,7 +573,7 @@ async fn retrieve_vectors() { .update_settings(json!({ "displayedAttributes": ["id", "title", "desc", "_vectors"]} )) .await; assert_eq!(202, code, "{:?}", response); - index.wait_task(response.uid()).await; + index.wait_task(response.uid()).await.succeeded(); let (response, code) = index .search_post( @@ -623,7 +623,7 @@ async fn retrieve_vectors() { let (response, code) = index.update_settings(json!({ "displayedAttributes": ["id", "title", "desc"]} )).await; assert_eq!(202, code, "{:?}", response); - index.wait_task(response.uid()).await; + index.wait_task(response.uid()).await.succeeded(); let (response, code) = index .search_post( diff --git a/crates/meilisearch/tests/search/locales.rs b/crates/meilisearch/tests/search/locales.rs index c01d854e2..282589d6a 100644 --- a/crates/meilisearch/tests/search/locales.rs +++ b/crates/meilisearch/tests/search/locales.rs @@ -98,8 +98,8 @@ async fn simple_search() { json!({"searchableAttributes": ["name_en", "name_ja", "name_zh", "author_en", "author_ja", "author_zh", "description_en", "description_ja", "description_zh"]}), ) .await; - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); // english index @@ -220,8 +220,8 @@ async fn force_locales() { "enqueuedAt": "[date]" } "###); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); // chinese detection index @@ -298,8 +298,8 @@ async fn force_locales_with_pattern() { "enqueuedAt": "[date]" } "###); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); // chinese detection index @@ -374,8 +374,8 @@ async fn force_locales_with_pattern_nested() { "enqueuedAt": "[date]" } "###); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); // chinese index @@ -449,8 +449,8 @@ async fn force_different_locales_with_pattern() { "enqueuedAt": "[date]" } "###); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); // force chinese index @@ -527,8 +527,8 @@ async fn auto_infer_locales_at_search_with_attributes_to_search_on() { "enqueuedAt": "[date]" } "###); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); // auto infer any language index @@ -601,8 +601,8 @@ async fn auto_infer_locales_at_search() { "enqueuedAt": "[date]" } "###); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); index .search( @@ -700,8 +700,8 @@ async fn force_different_locales_with_pattern_nested() { "enqueuedAt": "[date]" } "###); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); // chinese index @@ -778,8 +778,8 @@ async fn settings_change() { let index = server.index("test"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, _) = index .update_settings(json!({ "searchableAttributes": ["document_en", "document_ja", "document_zh"], @@ -798,7 +798,7 @@ async fn settings_change() { "enqueuedAt": "[date]" } "###); - index.wait_task(1).await; + index.wait_task(response.uid()).await.succeeded(); // chinese index @@ -861,7 +861,7 @@ async fn settings_change() { "enqueuedAt": "[date]" } "###); - index.wait_task(2).await; + index.wait_task(response.uid()).await.succeeded(); // chinese index @@ -915,8 +915,8 @@ async fn invalid_locales() { json!({"searchableAttributes": ["name_en", "name_ja", "name_zh", "author_en", "author_ja", "author_zh", "description_en", "description_ja", "description_zh"]}), ) .await; - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index.search_post(json!({"q": "Atta", "locales": ["invalid"]})).await; snapshot!(code, @"400 Bad Request"); @@ -1033,8 +1033,8 @@ async fn simple_facet_search() { "enqueuedAt": "[date]" } "###); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, _) = index .facet_search(json!({"facetName": "name_zh", "facetQuery": "進撃", "locales": ["cmn"]})) @@ -1095,8 +1095,8 @@ async fn facet_search_with_localized_attributes() { "enqueuedAt": "[date]" } "###); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, _) = index .facet_search(json!({"facetName": "name_zh", "facetQuery": "进击", "locales": ["cmn"]})) @@ -1165,7 +1165,7 @@ async fn swedish_search() { ] })) .await; - index.wait_task(1).await; + index.wait_task(_response.uid()).await.succeeded(); // infer swedish index @@ -1286,7 +1286,7 @@ async fn german_search() { ] })) .await; - index.wait_task(1).await; + index.wait_task(_response.uid()).await.succeeded(); // infer swedish index diff --git a/crates/meilisearch/tests/search/matching_strategy.rs b/crates/meilisearch/tests/search/matching_strategy.rs index a4cb19f62..3b4325c10 100644 --- a/crates/meilisearch/tests/search/matching_strategy.rs +++ b/crates/meilisearch/tests/search/matching_strategy.rs @@ -8,8 +8,8 @@ use crate::json; async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> { let index = server.index("test"); - index.add_documents(documents.clone(), None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents.clone(), None).await; + index.wait_task(task.uid()).await.succeeded(); index } diff --git a/crates/meilisearch/tests/search/mod.rs b/crates/meilisearch/tests/search/mod.rs index 057b2b3a2..0046964fa 100644 --- a/crates/meilisearch/tests/search/mod.rs +++ b/crates/meilisearch/tests/search/mod.rs @@ -138,8 +138,8 @@ async fn phrase_search_with_stop_word() { meili_snap::snapshot!(code, @"202 Accepted"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); index .search(json!({"q": "how \"to\" train \"the" }), |response, code| { @@ -218,11 +218,12 @@ async fn negative_special_cases_search() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); - index.update_settings(json!({"synonyms": { "escape": ["gläss"] }})).await; - index.wait_task(1).await; + let (task, _status_code) = + index.update_settings(json!({"synonyms": { "escape": ["gläss"] }})).await; + index.wait_task(task.uid()).await.succeeded(); // There is a synonym for escape -> glass but we don't want "escape", only the derivates: glass index @@ -247,8 +248,8 @@ async fn test_kanji_language_detection() { { "id": 1, "title": "東京のお寿司。" }, { "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" } ]); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); index .search(json!({"q": "東京"}), |response, code| { @@ -270,11 +271,11 @@ async fn test_thai_language() { { "id": 1, "title": "สบู่สมุนไพรชาเขียว 100 กรัม จำนวน 6 ก้อน" }, { "id": 2, "title": "สบู่สมุนไพรฝางแดงผสมว่านหางจรเข้ 100 กรัม จำนวน 6 ก้อน" } ]); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); - index.update_settings(json!({"rankingRules": ["exactness"]})).await; - index.wait_task(1).await; + let (task, _status_code) = index.update_settings(json!({"rankingRules": ["exactness"]})).await; + index.wait_task(task.uid()).await.succeeded(); index .search(json!({"q": "สบู"}), |response, code| { @@ -329,9 +330,9 @@ async fn search_with_filter_string_notation() { meili_snap::snapshot!(code, @"202 Accepted"); let documents = DOCUMENTS.clone(); - let (_, code) = index.add_documents(documents, None).await; + let (task, code) = index.add_documents(documents, None).await; meili_snap::snapshot!(code, @"202 Accepted"); - let res = index.wait_task(1).await; + let res = index.wait_task(task.uid()).await; meili_snap::snapshot!(res["status"], @r###""succeeded""###); index @@ -353,9 +354,9 @@ async fn search_with_filter_string_notation() { meili_snap::snapshot!(code, @"202 Accepted"); let documents = NESTED_DOCUMENTS.clone(); - let (_, code) = index.add_documents(documents, None).await; + let (task, code) = index.add_documents(documents, None).await; meili_snap::snapshot!(code, @"202 Accepted"); - let res = index.wait_task(3).await; + let res = index.wait_task(task.uid()).await; meili_snap::snapshot!(res["status"], @r###""succeeded""###); index @@ -607,8 +608,8 @@ async fn displayed_attributes() { index.update_settings(json!({ "displayedAttributes": ["title"] })).await; let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index.search_post(json!({ "attributesToRetrieve": ["title", "id"] })).await; @@ -622,8 +623,8 @@ async fn placeholder_search_is_hard_limited() { let index = server.index("test"); let documents: Vec<_> = (0..1200).map(|i| json!({ "id": i, "text": "I am unique!" })).collect(); - index.add_documents(documents.into(), None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents.into(), None).await; + index.wait_task(task.uid()).await.succeeded(); index .search( @@ -650,8 +651,9 @@ async fn placeholder_search_is_hard_limited() { ) .await; - index.update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } })).await; - index.wait_task(1).await; + let (task, _status_code) = + index.update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } })).await; + index.wait_task(task.uid()).await.succeeded(); index .search( @@ -685,8 +687,8 @@ async fn search_is_hard_limited() { let index = server.index("test"); let documents: Vec<_> = (0..1200).map(|i| json!({ "id": i, "text": "I am unique!" })).collect(); - index.add_documents(documents.into(), None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents.into(), None).await; + index.wait_task(task.uid()).await.succeeded(); index .search( @@ -715,8 +717,9 @@ async fn search_is_hard_limited() { ) .await; - index.update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } })).await; - index.wait_task(1).await; + let (task, _status_code) = + index.update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } })).await; + index.wait_task(task.uid()).await.succeeded(); index .search( @@ -754,8 +757,8 @@ async fn faceting_max_values_per_facet() { index.update_settings(json!({ "filterableAttributes": ["number"] })).await; let documents: Vec<_> = (0..10_000).map(|id| json!({ "id": id, "number": id * 10 })).collect(); - index.add_documents(json!(documents), None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(json!(documents), None).await; + index.wait_task(task.uid()).await.succeeded(); index .search( @@ -770,8 +773,9 @@ async fn faceting_max_values_per_facet() { ) .await; - index.update_settings(json!({ "faceting": { "maxValuesPerFacet": 10_000 } })).await; - index.wait_task(2).await; + let (task, _status_code) = + index.update_settings(json!({ "faceting": { "maxValuesPerFacet": 10_000 } })).await; + index.wait_task(task.uid()).await.succeeded(); index .search( @@ -795,7 +799,7 @@ async fn test_score_details() { let documents = DOCUMENTS.clone(); let res = index.add_documents(json!(documents), None).await; - index.wait_task(res.0.uid()).await; + index.wait_task(res.0.uid()).await.succeeded(); index .search( @@ -868,7 +872,7 @@ async fn test_score() { let documents = SCORE_DOCUMENTS.clone(); let res = index.add_documents(json!(documents), None).await; - index.wait_task(res.0.uid()).await; + index.wait_task(res.0.uid()).await.succeeded(); index .search( @@ -921,7 +925,7 @@ async fn test_score_threshold() { let documents = SCORE_DOCUMENTS.clone(); let res = index.add_documents(json!(documents), None).await; - index.wait_task(res.0.uid()).await; + index.wait_task(res.0.uid()).await.succeeded(); index .search( @@ -1077,7 +1081,7 @@ async fn test_degraded_score_details() { index.add_documents(json!(documents), None).await; // We can't really use anything else than 0ms here; otherwise, the test will get flaky. let (res, _code) = index.update_settings(json!({ "searchCutoffMs": 0 })).await; - index.wait_task(res.uid()).await; + index.wait_task(res.uid()).await.succeeded(); index .search( @@ -1162,8 +1166,8 @@ async fn experimental_feature_vector_store() { let documents = DOCUMENTS.clone(); - index.add_documents(json!(documents), None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(json!(documents), None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index .search_post(json!({ @@ -1369,8 +1373,8 @@ async fn camelcased_words() { { "id": 3, "title": "TestAb" }, { "id": 4, "title": "testab" }, ]); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); index .search(json!({"q": "deLonghi"}), |response, code| { @@ -1587,13 +1591,14 @@ async fn simple_search_with_strange_synonyms() { let server = Server::new().await; let index = server.index("test"); - index.update_settings(json!({ "synonyms": {"&": ["to"], "to": ["&"]} })).await; - let r = index.wait_task(0).await; + let (task, _status_code) = + index.update_settings(json!({ "synonyms": {"&": ["to"], "to": ["&"]} })).await; + let r = index.wait_task(task.uid()).await; meili_snap::snapshot!(r["status"], @r###""succeeded""###); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); index .search(json!({"q": "How to train"}), |response, code| { @@ -1679,11 +1684,12 @@ async fn change_attributes_settings() { index.update_settings(json!({ "searchableAttributes": ["father", "mother"] })).await; let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(json!(documents), None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(json!(documents), None).await; + index.wait_task(task.uid()).await.succeeded(); - index.update_settings(json!({ "searchableAttributes": ["father", "mother", "doggos"], "filterableAttributes": ["doggos"] })).await; - index.wait_task(2).await; + let (task,_status_code) = + index.update_settings(json!({ "searchableAttributes": ["father", "mother", "doggos"], "filterableAttributes": ["doggos"] })).await; + index.wait_task(task.uid()).await.succeeded(); // search index diff --git a/crates/meilisearch/tests/search/multi.rs b/crates/meilisearch/tests/search/multi.rs index 9377f435a..5b0144d45 100644 --- a/crates/meilisearch/tests/search/multi.rs +++ b/crates/meilisearch/tests/search/multi.rs @@ -89,8 +89,8 @@ async fn simple_search_single_index() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"queries": [ @@ -161,8 +161,8 @@ async fn federation_single_search_single_index() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -208,8 +208,8 @@ async fn federation_multiple_search_single_index() { let index = server.index("test"); let documents = SCORE_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -283,8 +283,8 @@ async fn federation_two_search_single_index() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -351,8 +351,8 @@ async fn simple_search_missing_index_uid() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"queries": [ @@ -376,8 +376,8 @@ async fn federation_simple_search_missing_index_uid() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -401,8 +401,8 @@ async fn simple_search_illegal_index_uid() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"queries": [ @@ -426,8 +426,8 @@ async fn federation_search_illegal_index_uid() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -451,13 +451,13 @@ async fn simple_search_two_indexes() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("nested"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (add_task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(add_task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"queries": [ @@ -558,13 +558,13 @@ async fn federation_two_search_two_indexes() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("nested"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -666,18 +666,18 @@ async fn federation_multiple_search_multiple_indexes() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("nested"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("score"); let documents = SCORE_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(2).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -924,8 +924,8 @@ async fn search_one_index_doesnt_exist() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"queries": [ @@ -950,8 +950,8 @@ async fn federation_one_index_doesnt_exist() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -1021,13 +1021,13 @@ async fn search_one_query_error() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("nested"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"queries": [ @@ -1053,13 +1053,13 @@ async fn federation_one_query_error() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("nested"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -1085,13 +1085,13 @@ async fn federation_one_query_sort_error() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("nested"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -1117,13 +1117,13 @@ async fn search_multiple_query_errors() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("nested"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"queries": [ @@ -1149,13 +1149,13 @@ async fn federation_multiple_query_errors() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("nested"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"queries": [ @@ -1181,13 +1181,13 @@ async fn federation_multiple_query_sort_errors() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("nested"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"queries": [ @@ -1213,13 +1213,13 @@ async fn federation_multiple_query_errors_interleaved() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("nested"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"queries": [ @@ -1246,13 +1246,13 @@ async fn federation_multiple_query_sort_errors_interleaved() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("nested"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"queries": [ @@ -1280,14 +1280,14 @@ async fn federation_filter() { let documents = FRUITS_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings( json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST"]}), ) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -1348,7 +1348,7 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() { let documents = NESTED_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -1363,7 +1363,7 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // two identical placeholder search should have all results from first query let (response, code) = server @@ -1611,7 +1611,7 @@ async fn federation_sort_same_indexes_same_criterion_opposite_direction() { let documents = NESTED_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -1626,7 +1626,7 @@ async fn federation_sort_same_indexes_same_criterion_opposite_direction() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // two identical placeholder search should have all results from first query let (response, code) = server @@ -1671,7 +1671,7 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() { let documents = NESTED_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -1686,7 +1686,7 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // return mothers and fathers ordered accross fields. let (response, code) = server @@ -1935,7 +1935,7 @@ async fn federation_sort_same_indexes_different_criterion_opposite_direction() { let documents = NESTED_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -1950,7 +1950,7 @@ async fn federation_sort_same_indexes_different_criterion_opposite_direction() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // two identical placeholder search should have all results from first query let (response, code) = server @@ -1995,7 +1995,7 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { let documents = DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -2010,13 +2010,13 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let index = server.index("batman"); let documents = SCORE_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -2031,7 +2031,7 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // return titles ordered accross indexes let (response, code) = server @@ -2307,7 +2307,7 @@ async fn federation_sort_different_ranking_rules() { let documents = DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -2322,13 +2322,13 @@ async fn federation_sort_different_ranking_rules() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let index = server.index("batman"); let documents = SCORE_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -2343,7 +2343,7 @@ async fn federation_sort_different_ranking_rules() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // return titles ordered accross indexes let (response, code) = server @@ -2546,7 +2546,7 @@ async fn federation_sort_different_indexes_same_criterion_opposite_direction() { let documents = DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -2561,13 +2561,13 @@ async fn federation_sort_different_indexes_same_criterion_opposite_direction() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let index = server.index("batman"); let documents = SCORE_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -2582,7 +2582,7 @@ async fn federation_sort_different_indexes_same_criterion_opposite_direction() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // all results from query 0 let (response, code) = server @@ -2628,7 +2628,7 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() let documents = DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -2643,13 +2643,13 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let index = server.index("batman"); let documents = SCORE_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -2664,7 +2664,7 @@ async fn federation_sort_different_indexes_different_criterion_same_direction() ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // return titles ordered accross indexes let (response, code) = server @@ -2940,7 +2940,7 @@ async fn federation_sort_different_indexes_different_criterion_opposite_directio let documents = DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -2955,13 +2955,13 @@ async fn federation_sort_different_indexes_different_criterion_opposite_directio ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let index = server.index("batman"); let documents = SCORE_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -2976,7 +2976,7 @@ async fn federation_sort_different_indexes_different_criterion_opposite_directio ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // all results from query 0 first let (response, code) = server @@ -3020,18 +3020,18 @@ async fn federation_limit_offset() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("nested"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("score"); let documents = SCORE_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(2).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); { let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -3338,18 +3338,18 @@ async fn federation_formatting() { let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("nested"); let documents = NESTED_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); let index = server.index("score"); let documents = SCORE_DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(2).await; + let (task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); { let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -3685,14 +3685,14 @@ async fn federation_invalid_weight() { let documents = FRUITS_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings( json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST"]}), ) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -3719,14 +3719,14 @@ async fn federation_null_weight() { let documents = FRUITS_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings( json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST"]}), ) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -3787,7 +3787,7 @@ async fn federation_federated_contains_pagination() { let documents = FRUITS_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // fail when a federated query contains "limit" let (response, code) = server @@ -3867,11 +3867,11 @@ async fn federation_federated_contains_facets() { ) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let documents = FRUITS_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // empty facets are actually OK let (response, code) = server @@ -3951,7 +3951,7 @@ async fn federation_non_faceted_for_an_index() { ) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let index = server.index("fruits-no-name"); @@ -3961,17 +3961,17 @@ async fn federation_non_faceted_for_an_index() { ) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let index = server.index("fruits-no-facets"); let (value, _) = index.update_settings(json!({"searchableAttributes": ["name"]})).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let documents = FRUITS_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // fails let (response, code) = server @@ -4071,7 +4071,7 @@ async fn federation_non_federated_contains_federation_option() { let documents = FRUITS_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // fail when a non-federated query contains "federationOptions" let (response, code) = server @@ -4116,12 +4116,12 @@ async fn federation_vector_single_index() { } }})) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let documents = VECTOR_DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // same embedder let (response, code) = server @@ -4320,12 +4320,12 @@ async fn federation_vector_two_indexes() { }, }})) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let documents = VECTOR_DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let index = server.index("vectors-sentiment"); @@ -4337,12 +4337,12 @@ async fn federation_vector_two_indexes() { } }})) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let documents = VECTOR_DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": {}, "queries": [ @@ -4802,7 +4802,7 @@ async fn federation_facets_different_indexes_same_facet() { let documents = DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -4818,13 +4818,13 @@ async fn federation_facets_different_indexes_same_facet() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let index = server.index("batman"); let documents = SCORE_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -4840,13 +4840,13 @@ async fn federation_facets_different_indexes_same_facet() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let index = server.index("batman-2"); let documents = SCORE_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -4862,7 +4862,7 @@ async fn federation_facets_different_indexes_same_facet() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // return titles ordered accross indexes let (response, code) = server @@ -5369,7 +5369,7 @@ async fn federation_facets_same_indexes() { let documents = NESTED_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -5384,13 +5384,13 @@ async fn federation_facets_same_indexes() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let index = server.index("doggos-2"); let documents = NESTED_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -5405,7 +5405,7 @@ async fn federation_facets_same_indexes() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (response, code) = server .multi_search(json!({"federation": { @@ -5670,7 +5670,7 @@ async fn federation_inconsistent_merge_order() { let documents = DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -5686,13 +5686,13 @@ async fn federation_inconsistent_merge_order() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let index = server.index("movies-2"); let documents = DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -5711,13 +5711,13 @@ async fn federation_inconsistent_merge_order() { } })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let index = server.index("batman"); let documents = SCORE_DOCUMENTS.clone(); let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (value, _) = index .update_settings(json!({ @@ -5733,7 +5733,7 @@ async fn federation_inconsistent_merge_order() { ] })) .await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // without merging, it works let (response, code) = server diff --git a/crates/meilisearch/tests/search/restrict_searchable.rs b/crates/meilisearch/tests/search/restrict_searchable.rs index abd13fadf..ce99c4047 100644 --- a/crates/meilisearch/tests/search/restrict_searchable.rs +++ b/crates/meilisearch/tests/search/restrict_searchable.rs @@ -64,8 +64,8 @@ async fn search_no_searchable_attribute_set() { ) .await; - index.update_settings_searchable_attributes(json!(["*"])).await; - index.wait_task(1).await; + let (task, _status_code) = index.update_settings_searchable_attributes(json!(["*"])).await; + index.wait_task(task.uid()).await.succeeded(); index .search( @@ -77,8 +77,8 @@ async fn search_no_searchable_attribute_set() { ) .await; - index.update_settings_searchable_attributes(json!(["*"])).await; - index.wait_task(2).await; + let (task, _status_code) = index.update_settings_searchable_attributes(json!(["*"])).await; + index.wait_task(task.uid()).await.succeeded(); index .search( @@ -108,8 +108,8 @@ async fn search_on_all_attributes() { async fn search_on_all_attributes_restricted_set() { let server = Server::new().await; let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; - index.update_settings_searchable_attributes(json!(["title"])).await; - index.wait_task(1).await; + let (task, _status_code) = index.update_settings_searchable_attributes(json!(["title"])).await; + index.wait_task(task.uid()).await.succeeded(); index .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["*"]}), |response, code| { @@ -191,8 +191,10 @@ async fn word_ranking_rule_order() { async fn word_ranking_rule_order_exact_words() { let server = Server::new().await; let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; - index.update_settings_typo_tolerance(json!({"disableOnWords": ["Captain", "Marvel"]})).await; - index.wait_task(1).await; + let (task, _status_code) = index + .update_settings_typo_tolerance(json!({"disableOnWords": ["Captain", "Marvel"]})) + .await; + index.wait_task(task.uid()).await.succeeded(); // simple search should return 2 documents (ids: 2 and 3). index @@ -358,7 +360,7 @@ async fn search_on_exact_field() { let (response, code) = index.update_settings_typo_tolerance(json!({ "disableOnAttributes": ["exact"] })).await; assert_eq!(202, code, "{:?}", response); - index.wait_task(1).await; + index.wait_task(response.uid()).await.succeeded(); // Searching on an exact attribute should only return the document matching without typo. index .search(json!({"q": "Marvel", "attributesToSearchOn": ["exact"]}), |response, code| { diff --git a/crates/meilisearch/tests/settings/distinct.rs b/crates/meilisearch/tests/settings/distinct.rs index 42c5d38bd..2c5b7517f 100644 --- a/crates/meilisearch/tests/settings/distinct.rs +++ b/crates/meilisearch/tests/settings/distinct.rs @@ -6,16 +6,16 @@ async fn set_and_reset_distinct_attribute() { let server = Server::new().await; let index = server.index("test"); - let (_response, _code) = index.update_settings(json!({ "distinctAttribute": "test"})).await; - index.wait_task(0).await; + let (task1, _code) = index.update_settings(json!({ "distinctAttribute": "test"})).await; + index.wait_task(task1.uid()).await.succeeded(); let (response, _) = index.settings().await; assert_eq!(response["distinctAttribute"], "test"); - index.update_settings(json!({ "distinctAttribute": null })).await; + let (task2, _status_code) = index.update_settings(json!({ "distinctAttribute": null })).await; - index.wait_task(1).await; + index.wait_task(task2.uid()).await.succeeded(); let (response, _) = index.settings().await; @@ -27,16 +27,16 @@ async fn set_and_reset_distinct_attribute_with_dedicated_route() { let server = Server::new().await; let index = server.index("test"); - let (_response, _code) = index.update_distinct_attribute(json!("test")).await; - index.wait_task(0).await; + let (update_task1, _code) = index.update_distinct_attribute(json!("test")).await; + index.wait_task(update_task1.uid()).await.succeeded(); let (response, _) = index.get_distinct_attribute().await; assert_eq!(response, "test"); - index.update_distinct_attribute(json!(null)).await; + let (update_task2, _status_code) = index.update_distinct_attribute(json!(null)).await; - index.wait_task(1).await; + index.wait_task(update_task2.uid()).await.succeeded(); let (response, _) = index.get_distinct_attribute().await; diff --git a/crates/meilisearch/tests/settings/get_settings.rs b/crates/meilisearch/tests/settings/get_settings.rs index 55d9441ee..b09867572 100644 --- a/crates/meilisearch/tests/settings/get_settings.rs +++ b/crates/meilisearch/tests/settings/get_settings.rs @@ -193,7 +193,7 @@ async fn get_settings() { let server = Server::new().await; let index = server.index("test"); let (response, _code) = index.create(None).await; - index.wait_task(response.uid()).await; + index.wait_task(response.uid()).await.succeeded(); let (response, code) = index.settings().await; assert_eq!(code, 200); let settings = response.as_object().unwrap(); @@ -250,7 +250,7 @@ async fn secrets_are_hidden_in_settings() { let index = server.index("test"); let (response, _code) = index.create(None).await; - index.wait_task(response.uid()).await; + index.wait_task(response.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ @@ -285,7 +285,7 @@ async fn secrets_are_hidden_in_settings() { let (response, code) = index.settings().await; meili_snap::snapshot!(code, @"200 OK"); - meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + meili_snap::snapshot!(meili_snap::json_string!(response), @r#" { "displayedAttributes": [ "*" @@ -346,11 +346,11 @@ async fn secrets_are_hidden_in_settings() { "facetSearch": true, "prefixSearch": "indexingTime" } - "###); + "#); let (response, code) = server.get_task(settings_update_uid).await; meili_snap::snapshot!(code, @"200 OK"); - meili_snap::snapshot!(meili_snap::json_string!(response["details"]), @r###" + meili_snap::snapshot!(meili_snap::json_string!(response["details"]), @r#" { "embedders": { "default": { @@ -363,7 +363,7 @@ async fn secrets_are_hidden_in_settings() { } } } - "###); + "#); } #[actix_rt::test] @@ -378,15 +378,15 @@ async fn error_update_settings_unknown_field() { async fn test_partial_update() { let server = Server::new().await; let index = server.index("test"); - let (_response, _code) = index.update_settings(json!({"displayedAttributes": ["foo"]})).await; - index.wait_task(0).await; + let (task, _code) = index.update_settings(json!({"displayedAttributes": ["foo"]})).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index.settings().await; assert_eq!(code, 200); assert_eq!(response["displayedAttributes"], json!(["foo"])); assert_eq!(response["searchableAttributes"], json!(["*"])); - let (_response, _) = index.update_settings(json!({"searchableAttributes": ["bar"]})).await; - index.wait_task(1).await; + let (task, _) = index.update_settings(json!({"searchableAttributes": ["bar"]})).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index.settings().await; assert_eq!(code, 200); @@ -398,10 +398,10 @@ async fn test_partial_update() { async fn error_delete_settings_unexisting_index() { let server = Server::new().await; let index = server.index("test"); - let (_response, code) = index.delete_settings().await; + let (task, code) = index.delete_settings().await; assert_eq!(code, 202); - let response = index.wait_task(0).await; + let response = index.wait_task(task.uid()).await; assert_eq!(response["status"], "failed"); } @@ -422,12 +422,12 @@ async fn reset_all_settings() { let (response, code) = index.add_documents(documents, None).await; assert_eq!(code, 202); assert_eq!(response["taskUid"], 0); - index.wait_task(0).await; + index.wait_task(response.uid()).await.succeeded(); - index + let (update_task,_status_code) = index .update_settings(json!({"displayedAttributes": ["name", "age"], "searchableAttributes": ["name"], "stopWords": ["the"], "filterableAttributes": ["age"], "synonyms": {"puppy": ["dog", "doggo", "potat"] }})) .await; - index.wait_task(1).await; + index.wait_task(update_task.uid()).await.succeeded(); let (response, code) = index.settings().await; assert_eq!(code, 200); assert_eq!(response["displayedAttributes"], json!(["name", "age"])); @@ -436,8 +436,8 @@ async fn reset_all_settings() { assert_eq!(response["synonyms"], json!({"puppy": ["dog", "doggo", "potat"] })); assert_eq!(response["filterableAttributes"], json!(["age"])); - index.delete_settings().await; - index.wait_task(2).await; + let (delete_task, _status_code) = index.delete_settings().await; + index.wait_task(delete_task.uid()).await.succeeded(); let (response, code) = index.settings().await; assert_eq!(code, 200); @@ -456,14 +456,14 @@ async fn reset_all_settings() { async fn update_setting_unexisting_index() { let server = Server::new().await; let index = server.index("test"); - let (_response, code) = index.update_settings(json!({})).await; + let (task, code) = index.update_settings(json!({})).await; assert_eq!(code, 202); - let response = index.wait_task(0).await; + let response = index.wait_task(task.uid()).await; assert_eq!(response["status"], "succeeded"); let (_response, code) = index.get().await; assert_eq!(code, 200); - index.delete_settings().await; - let response = index.wait_task(1).await; + let (task, _status_code) = index.delete_settings().await; + let response = index.wait_task(task.uid()).await; assert_eq!(response["status"], "succeeded"); } @@ -506,16 +506,16 @@ async fn set_and_reset_distinct_attribute_with_dedicated_route() { let server = Server::new().await; let index = server.index("test"); - let (_response, _code) = index.update_distinct_attribute(json!("test")).await; - index.wait_task(0).await; + let (task, _code) = index.update_distinct_attribute(json!("test")).await; + index.wait_task(task.uid()).await.succeeded(); let (response, _) = index.get_distinct_attribute().await; assert_eq!(response, "test"); - index.update_distinct_attribute(json!(null)).await; + let (task, _status_code) = index.update_distinct_attribute(json!(null)).await; - index.wait_task(1).await; + index.wait_task(task.uid()).await.succeeded(); let (response, _) = index.get_distinct_attribute().await; diff --git a/crates/meilisearch/tests/settings/proximity_settings.rs b/crates/meilisearch/tests/settings/proximity_settings.rs index 8b206ded4..c5897bc51 100644 --- a/crates/meilisearch/tests/settings/proximity_settings.rs +++ b/crates/meilisearch/tests/settings/proximity_settings.rs @@ -29,8 +29,8 @@ async fn attribute_scale_search() { let server = Server::new().await; let index = server.index("test"); - index.add_documents(DOCUMENTS.clone(), None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(DOCUMENTS.clone(), None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ @@ -39,7 +39,7 @@ async fn attribute_scale_search() { })) .await; assert_eq!("202", code.as_str(), "{:?}", response); - index.wait_task(1).await; + index.wait_task(response.uid()).await.succeeded(); // the expected order is [1, 3, 2] instead of [3, 1, 2] // because the attribute scale doesn't make the difference between 1 and 3. @@ -102,16 +102,16 @@ async fn attribute_scale_phrase_search() { let server = Server::new().await; let index = server.index("test"); - index.add_documents(DOCUMENTS.clone(), None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(DOCUMENTS.clone(), None).await; + index.wait_task(task.uid()).await.succeeded(); - let (_response, _code) = index + let (task, _code) = index .update_settings(json!({ "proximityPrecision": "byAttribute", "rankingRules": ["words", "typo", "proximity"], })) .await; - index.wait_task(1).await; + index.wait_task(task.uid()).await.succeeded(); // the expected order is [1, 3] instead of [3, 1] // because the attribute scale doesn't make the difference between 1 and 3. @@ -170,25 +170,25 @@ async fn word_scale_set_and_reset() { let server = Server::new().await; let index = server.index("test"); - index.add_documents(DOCUMENTS.clone(), None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(DOCUMENTS.clone(), None).await; + index.wait_task(task.uid()).await.succeeded(); // Set and reset the setting ensuring the swap between the 2 settings is applied. - let (_response, _code) = index + let (update_task1, _code) = index .update_settings(json!({ "proximityPrecision": "byAttribute", "rankingRules": ["words", "typo", "proximity"], })) .await; - index.wait_task(1).await; + index.wait_task(update_task1.uid()).await.succeeded(); - let (_response, _code) = index + let (update_task2, _code) = index .update_settings(json!({ "proximityPrecision": "byWord", "rankingRules": ["words", "typo", "proximity"], })) .await; - index.wait_task(2).await; + index.wait_task(update_task2.uid()).await.succeeded(); // [3, 1, 2] index @@ -285,8 +285,8 @@ async fn attribute_scale_default_ranking_rules() { let server = Server::new().await; let index = server.index("test"); - index.add_documents(DOCUMENTS.clone(), None).await; - index.wait_task(0).await; + let (task, _status_code) = index.add_documents(DOCUMENTS.clone(), None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ @@ -294,7 +294,7 @@ async fn attribute_scale_default_ranking_rules() { })) .await; assert_eq!("202", code.as_str(), "{:?}", response); - index.wait_task(1).await; + index.wait_task(response.uid()).await.succeeded(); // the expected order is [3, 1, 2] index diff --git a/crates/meilisearch/tests/settings/tokenizer_customization.rs b/crates/meilisearch/tests/settings/tokenizer_customization.rs index 4602e31f7..190918b34 100644 --- a/crates/meilisearch/tests/settings/tokenizer_customization.rs +++ b/crates/meilisearch/tests/settings/tokenizer_customization.rs @@ -8,14 +8,14 @@ async fn set_and_reset() { let server = Server::new().await; let index = server.index("test"); - let (_response, _code) = index + let (task, _code) = index .update_settings(json!({ "nonSeparatorTokens": ["#", "&"], "separatorTokens": ["&sep", "
"], "dictionary": ["J.R.R.", "J. R. R."], })) .await; - index.wait_task(0).await; + index.wait_task(task.uid()).await.succeeded(); let (response, _) = index.settings().await; snapshot!(json_string!(response["nonSeparatorTokens"]), @r###" @@ -37,7 +37,7 @@ async fn set_and_reset() { ] "###); - index + let (task, _status_code) = index .update_settings(json!({ "nonSeparatorTokens": null, "separatorTokens": null, @@ -45,7 +45,7 @@ async fn set_and_reset() { })) .await; - index.wait_task(1).await; + index.wait_task(task.uid()).await.succeeded(); let (response, _) = index.settings().await; snapshot!(json_string!(response["nonSeparatorTokens"]), @"[]"); @@ -73,17 +73,17 @@ async fn set_and_search() { let server = Server::new().await; let index = server.index("test"); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (add_task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(add_task.uid()).await.succeeded(); - let (_response, _code) = index + let (update_task, _code) = index .update_settings(json!({ "nonSeparatorTokens": ["#", "&"], "separatorTokens": ["
", "&sep"], "dictionary": ["#", "A#", "B#", "C#", "D#", "E#", "F#", "G#"], })) .await; - index.wait_task(1).await; + index.wait_task(update_task.uid()).await.succeeded(); index .search(json!({"q": "&", "attributesToHighlight": ["content"]}), |response, code| { @@ -227,10 +227,10 @@ async fn advanced_synergies() { let server = Server::new().await; let index = server.index("test"); - index.add_documents(documents, None).await; - index.wait_task(0).await; + let (add_task, _status_code) = index.add_documents(documents, None).await; + index.wait_task(add_task.uid()).await.succeeded(); - let (_response, _code) = index + let (update_task, _code) = index .update_settings(json!({ "dictionary": ["J.R.R.", "J. R. R."], "synonyms": { @@ -243,7 +243,7 @@ async fn advanced_synergies() { } })) .await; - index.wait_task(1).await; + index.wait_task(update_task.uid()).await.succeeded(); index .search(json!({"q": "J.R.R.", "attributesToHighlight": ["content"]}), |response, code| { @@ -353,7 +353,7 @@ async fn advanced_synergies() { "dictionary": ["J.R.R.", "J. R. R.", "J.K.", "J. K."], })) .await; - index.wait_task(2).await; + index.wait_task(_response.uid()).await.succeeded(); index .search(json!({"q": "jk", "attributesToHighlight": ["content"]}), |response, code| { diff --git a/crates/meilisearch/tests/similar/errors.rs b/crates/meilisearch/tests/similar/errors.rs index 86fca97ad..c19c0b654 100644 --- a/crates/meilisearch/tests/similar/errors.rs +++ b/crates/meilisearch/tests/similar/errors.rs @@ -324,7 +324,7 @@ async fn similar_bad_filter() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (response, code) = index.similar_post(json!({ "id": 287947, "filter": true, "embedder": "manual" })).await; @@ -362,7 +362,7 @@ async fn filter_invalid_syntax_object() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); index .similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| { @@ -401,7 +401,7 @@ async fn filter_invalid_syntax_array() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); index .similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| { @@ -440,7 +440,7 @@ async fn filter_invalid_syntax_string() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let expected_response = json!({ "message": "Found unexpected characters at the end of the filter: `XOR title = Glass`. You probably forgot an `OR` or an `AND` rule.\n15:32 title = Glass XOR title = Glass", @@ -481,7 +481,7 @@ async fn filter_invalid_attribute_array() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let expected_response = json!({ "message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", @@ -522,7 +522,7 @@ async fn filter_invalid_attribute_string() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let expected_response = json!({ "message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", @@ -563,7 +563,7 @@ async fn filter_reserved_geo_attribute_array() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass", @@ -604,7 +604,7 @@ async fn filter_reserved_geo_attribute_string() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass", @@ -645,7 +645,7 @@ async fn filter_reserved_attribute_array() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass", @@ -686,7 +686,7 @@ async fn filter_reserved_attribute_string() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass", @@ -727,7 +727,7 @@ async fn filter_reserved_geo_point_array() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass", @@ -768,7 +768,7 @@ async fn filter_reserved_geo_point_string() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let expected_response = json!({ "message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass", diff --git a/crates/meilisearch/tests/similar/mod.rs b/crates/meilisearch/tests/similar/mod.rs index fa0797a41..71518f04c 100644 --- a/crates/meilisearch/tests/similar/mod.rs +++ b/crates/meilisearch/tests/similar/mod.rs @@ -77,7 +77,7 @@ async fn basic() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); index .similar( @@ -274,7 +274,7 @@ async fn ranking_score_threshold() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); index .similar( @@ -555,7 +555,7 @@ async fn filter() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); index .similar( @@ -684,7 +684,7 @@ async fn limit_and_offset() { let documents = DOCUMENTS.clone(); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); index .similar( diff --git a/crates/meilisearch/tests/snapshot/mod.rs b/crates/meilisearch/tests/snapshot/mod.rs index 0d569fc7c..0f3417cdf 100644 --- a/crates/meilisearch/tests/snapshot/mod.rs +++ b/crates/meilisearch/tests/snapshot/mod.rs @@ -56,7 +56,7 @@ async fn perform_snapshot() { let (task, code) = server.index("test1").create(Some("prim")).await; meili_snap::snapshot!(code, @"202 Accepted"); - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); // wait for the _next task_ to process, aka the snapshot that should be enqueued at some point @@ -129,10 +129,10 @@ async fn perform_on_demand_snapshot() { index.load_test_set().await; - let (task, _) = server.index("doggo").create(Some("bone")).await; + let (task, _status_code) = server.index("doggo").create(Some("bone")).await; index.wait_task(task.uid()).await.succeeded(); - let (task, _) = server.index("doggo").create(Some("bone")).await; + let (task, _status_code) = server.index("doggo").create(Some("bone")).await; index.wait_task(task.uid()).await.failed(); let (task, code) = server.create_snapshot().await; diff --git a/crates/meilisearch/tests/stats/mod.rs b/crates/meilisearch/tests/stats/mod.rs index a02a48a87..1b4e458d3 100644 --- a/crates/meilisearch/tests/stats/mod.rs +++ b/crates/meilisearch/tests/stats/mod.rs @@ -28,10 +28,10 @@ async fn test_healthyness() { async fn stats() { let server = Server::new().await; let index = server.index("test"); - let (_, code) = index.create(Some("id")).await; + let (task, code) = index.create(Some("id")).await; assert_eq!(code, 202); - index.wait_task(0).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = server.stats().await; @@ -57,7 +57,7 @@ async fn stats() { assert_eq!(code, 202, "{}", response); assert_eq!(response["taskUid"], 1); - index.wait_task(1).await; + index.wait_task(response.uid()).await.succeeded(); let timestamp = OffsetDateTime::now_utc(); let (response, code) = server.stats().await; diff --git a/crates/meilisearch/tests/swap_indexes/mod.rs b/crates/meilisearch/tests/swap_indexes/mod.rs index 646f773d8..bf84d5823 100644 --- a/crates/meilisearch/tests/swap_indexes/mod.rs +++ b/crates/meilisearch/tests/swap_indexes/mod.rs @@ -15,7 +15,7 @@ async fn swap_indexes() { let (res, code) = b.add_documents(json!({ "id": 1, "index": "b"}), None).await; snapshot!(code, @"202 Accepted"); snapshot!(res["taskUid"], @"1"); - server.wait_task(1).await; + server.wait_task(res.uid()).await; let (tasks, code) = server.tasks().await; snapshot!(code, @"200 OK"); @@ -67,7 +67,7 @@ async fn swap_indexes() { let (res, code) = server.index_swap(json!([{ "indexes": ["a", "b"] }])).await; snapshot!(code, @"202 Accepted"); snapshot!(res["taskUid"], @"2"); - server.wait_task(2).await; + server.wait_task(res.uid()).await; let (tasks, code) = server.tasks().await; snapshot!(code, @"200 OK"); @@ -159,7 +159,7 @@ async fn swap_indexes() { let (res, code) = d.add_documents(json!({ "id": 1, "index": "d"}), None).await; snapshot!(code, @"202 Accepted"); snapshot!(res["taskUid"], @"4"); - server.wait_task(4).await; + server.wait_task(res.uid()).await; // ensure the index creation worked properly let (tasks, code) = server.tasks_filter("limit=2").await; @@ -215,7 +215,7 @@ async fn swap_indexes() { server.index_swap(json!([{ "indexes": ["a", "b"] }, { "indexes": ["c", "d"] } ])).await; snapshot!(res["taskUid"], @"5"); snapshot!(code, @"202 Accepted"); - server.wait_task(5).await; + server.wait_task(res.uid()).await; // ensure the index creation worked properly let (tasks, code) = server.tasks().await; diff --git a/crates/meilisearch/tests/tasks/mod.rs b/crates/meilisearch/tests/tasks/mod.rs index c9d3f31ed..f432ef7db 100644 --- a/crates/meilisearch/tests/tasks/mod.rs +++ b/crates/meilisearch/tests/tasks/mod.rs @@ -13,8 +13,8 @@ use crate::json; async fn error_get_unexisting_task_status() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); let (response, code) = index.get_task(1).await; let expected_response = json!({ @@ -32,8 +32,8 @@ async fn error_get_unexisting_task_status() { async fn get_task_status() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index + let (create_task, _status_code) = index.create(None).await; + let (add_task, _status_code) = index .add_documents( json!([{ "id": 1, @@ -42,8 +42,8 @@ async fn get_task_status() { None, ) .await; - index.wait_task(0).await; - let (_response, code) = index.get_task(1).await; + index.wait_task(create_task.uid()).await.succeeded(); + let (_response, code) = index.get_task(add_task.uid()).await; assert_eq!(code, 200); // TODO check response format, as per #48 } @@ -52,8 +52,8 @@ async fn get_task_status() { async fn list_tasks() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); index .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; @@ -105,7 +105,7 @@ async fn list_tasks_with_star_filters() { let server = Server::new().await; let index = server.index("test"); let (task, _code) = index.create(None).await; - index.wait_task(task.uid()).await; + index.wait_task(task.uid()).await.succeeded(); index .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; @@ -149,25 +149,21 @@ async fn list_tasks_with_star_filters() { async fn list_tasks_status_filtered() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; - index - .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) - .await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.failed(); let (response, code) = index.filtered_tasks(&[], &["succeeded"], &[]).await; assert_eq!(code, 200, "{}", response); assert_eq!(response["results"].as_array().unwrap().len(), 1); - // We can't be sure that the update isn't already processed so we can't test this - // let (response, code) = index.filtered_tasks(&[], &["processing"]).await; - // assert_eq!(code, 200, "{}", response); - // assert_eq!(response["results"].as_array().unwrap().len(), 1); - - index.wait_task(1).await; - let (response, code) = index.filtered_tasks(&[], &["succeeded"], &[]).await; assert_eq!(code, 200, "{}", response); + assert_eq!(response["results"].as_array().unwrap().len(), 1); + + let (response, code) = index.filtered_tasks(&[], &["succeeded", "failed"], &[]).await; + assert_eq!(code, 200, "{}", response); assert_eq!(response["results"].as_array().unwrap().len(), 2); } @@ -175,8 +171,8 @@ async fn list_tasks_status_filtered() { async fn list_tasks_type_filtered() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); index .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; @@ -195,8 +191,8 @@ async fn list_tasks_type_filtered() { async fn list_tasks_invalid_canceled_by_filter() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); index .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; @@ -210,8 +206,8 @@ async fn list_tasks_invalid_canceled_by_filter() { async fn list_tasks_status_and_type_filtered() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); index .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; @@ -278,8 +274,9 @@ async fn test_summarized_task_view() { async fn test_summarized_document_addition_or_update() { let server = Server::new().await; let index = server.index("test"); - index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), None).await; - index.wait_task(0).await; + let (task, _status_code) = + index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), None).await; + index.wait_task(task.uid()).await.succeeded(); let (task, _) = index.get_task(0).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -303,8 +300,9 @@ async fn test_summarized_document_addition_or_update() { } "###); - index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await; - index.wait_task(1).await; + let (task, _status_code) = + index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await; + index.wait_task(task.uid()).await.succeeded(); let (task, _) = index.get_task(1).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -333,8 +331,8 @@ async fn test_summarized_document_addition_or_update() { async fn test_summarized_delete_documents_by_batch() { let server = Server::new().await; let index = server.index("test"); - index.delete_batch(vec![1, 2, 3]).await; - index.wait_task(0).await; + let (task, _status_code) = index.delete_batch(vec![1, 2, 3]).await; + index.wait_task(task.uid()).await.failed(); let (task, _) = index.get_task(0).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @@ -365,9 +363,9 @@ async fn test_summarized_delete_documents_by_batch() { "###); index.create(None).await; - index.delete_batch(vec![42]).await; - index.wait_task(2).await; - let (task, _) = index.get_task(2).await; + let (del_task, _status_code) = index.delete_batch(vec![42]).await; + index.wait_task(del_task.uid()).await.succeeded(); + let (task, _) = index.get_task(del_task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -397,9 +395,10 @@ async fn test_summarized_delete_documents_by_filter() { let server = Server::new().await; let index = server.index("test"); - index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; - index.wait_task(0).await; - let (task, _) = index.get_task(0).await; + let (task, _status_code) = + index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; + index.wait_task(task.uid()).await.failed(); + let (task, _) = index.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -429,9 +428,10 @@ async fn test_summarized_delete_documents_by_filter() { "###); index.create(None).await; - index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; - index.wait_task(2).await; - let (task, _) = index.get_task(2).await; + let (task, _status_code) = + index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; + index.wait_task(task.uid()).await.failed(); + let (task, _) = index.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -461,9 +461,10 @@ async fn test_summarized_delete_documents_by_filter() { "###); index.update_settings(json!({ "filterableAttributes": ["doggo"] })).await; - index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; - index.wait_task(4).await; - let (task, _) = index.get_task(4).await; + let (task, _status_code) = + index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; + index.wait_task(task.uid()).await.succeeded(); + let (task, _) = index.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -492,9 +493,9 @@ async fn test_summarized_delete_documents_by_filter() { async fn test_summarized_delete_document_by_id() { let server = Server::new().await; let index = server.index("test"); - index.delete_document(1).await; - index.wait_task(0).await; - let (task, _) = index.get_task(0).await; + let (task, _status_code) = index.delete_document(1).await; + index.wait_task(task.uid()).await.failed(); + let (task, _) = index.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -524,9 +525,9 @@ async fn test_summarized_delete_document_by_id() { "###); index.create(None).await; - index.delete_document(42).await; - index.wait_task(2).await; - let (task, _) = index.get_task(2).await; + let (task, _status_code) = index.delete_document(42).await; + index.wait_task(task.uid()).await.succeeded(); + let (task, _) = index.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -567,9 +568,9 @@ async fn test_summarized_settings_update() { } "###); - index.update_settings(json!({ "displayedAttributes": ["doggos", "name"], "filterableAttributes": ["age", "nb_paw_pads"], "sortableAttributes": ["iq"] })).await; - index.wait_task(0).await; - let (task, _) = index.get_task(0).await; + let (task,_status_code) = index.update_settings(json!({ "displayedAttributes": ["doggos", "name"], "filterableAttributes": ["age", "nb_paw_pads"], "sortableAttributes": ["iq"] })).await; + index.wait_task(task.uid()).await.succeeded(); + let (task, _) = index.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -606,9 +607,9 @@ async fn test_summarized_settings_update() { async fn test_summarized_index_creation() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; - let (task, _) = index.get_task(0).await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task, _) = index.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -630,9 +631,9 @@ async fn test_summarized_index_creation() { } "###); - index.create(Some("doggos")).await; - index.wait_task(1).await; - let (task, _) = index.get_task(1).await; + let (task, _status_code) = index.create(Some("doggos")).await; + index.wait_task(task.uid()).await.failed(); + let (task, _) = index.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -774,9 +775,9 @@ async fn test_summarized_index_update() { let server = Server::new().await; let index = server.index("test"); // If the index doesn't exist yet, we should get errors with or without the primary key. - index.update(None).await; - index.wait_task(0).await; - let (task, _) = index.get_task(0).await; + let (task, _status_code) = index.update(None).await; + index.wait_task(task.uid()).await.failed(); + let (task, _) = index.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -803,9 +804,9 @@ async fn test_summarized_index_update() { } "###); - index.update(Some("bones")).await; - index.wait_task(1).await; - let (task, _) = index.get_task(1).await; + let (task, _status_code) = index.update(Some("bones")).await; + index.wait_task(task.uid()).await.failed(); + let (task, _) = index.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -835,9 +836,9 @@ async fn test_summarized_index_update() { // And run the same two tests once the index do exists. index.create(None).await; - index.update(None).await; - index.wait_task(3).await; - let (task, _) = index.get_task(3).await; + let (task, _status_code) = index.update(None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task, _) = index.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -859,9 +860,9 @@ async fn test_summarized_index_update() { } "###); - index.update(Some("bones")).await; - index.wait_task(4).await; - let (task, _) = index.get_task(4).await; + let (task, _status_code) = index.update(Some("bones")).await; + index.wait_task(task.uid()).await.succeeded(); + let (task, _) = index.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -887,13 +888,13 @@ async fn test_summarized_index_update() { #[actix_web::test] async fn test_summarized_index_swap() { let server = Server::new().await; - server + let (task, _status_code) = server .index_swap(json!([ { "indexes": ["doggos", "cattos"] } ])) .await; - server.wait_task(0).await; - let (task, _) = server.get_task(0).await; + server.wait_task(task.uid()).await.failed(); + let (task, _) = server.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -927,15 +928,17 @@ async fn test_summarized_index_swap() { } "###); - server.index("doggos").create(None).await; - server.index("cattos").create(None).await; - server + let (task, _code) = server.index("doggos").create(None).await; + server.wait_task(task.uid()).await.succeeded(); + let (task, _code) = server.index("cattos").create(None).await; + server.wait_task(task.uid()).await.succeeded(); + let (task, _code) = server .index_swap(json!([ { "indexes": ["doggos", "cattos"] } ])) .await; - server.wait_task(3).await; - let (task, _) = server.get_task(3).await; + server.wait_task(task.uid()).await.succeeded(); + let (task, _) = server.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -970,11 +973,11 @@ async fn test_summarized_task_cancelation() { let server = Server::new().await; let index = server.index("doggos"); // to avoid being flaky we're only going to cancel an already finished task :( - index.create(None).await; - index.wait_task(0).await; - server.cancel_tasks("uids=0").await; - index.wait_task(1).await; - let (task, _) = index.get_task(1).await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = server.cancel_tasks("uids=0").await; + index.wait_task(task.uid()).await.succeeded(); + let (task, _) = index.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -1004,11 +1007,11 @@ async fn test_summarized_task_deletion() { let server = Server::new().await; let index = server.index("doggos"); // to avoid being flaky we're only going to delete an already finished task :( - index.create(None).await; - index.wait_task(0).await; - server.delete_tasks("uids=0").await; - index.wait_task(1).await; - let (task, _) = index.get_task(1).await; + let (task, _status_code) = index.create(None).await; + index.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = server.delete_tasks("uids=0").await; + index.wait_task(task.uid()).await.succeeded(); + let (task, _) = index.get_task(task.uid()).await; assert_json_snapshot!(task, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" @@ -1036,9 +1039,9 @@ async fn test_summarized_task_deletion() { #[actix_web::test] async fn test_summarized_dump_creation() { let server = Server::new().await; - server.create_dump().await; - server.wait_task(0).await; - let (task, _) = server.get_task(0).await; + let (task, _status_code) = server.create_dump().await; + server.wait_task(task.uid()).await; + let (task, _) = server.get_task(task.uid()).await; assert_json_snapshot!(task, { ".details.dumpUid" => "[dumpUid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, @r###" diff --git a/crates/meilisearch/tests/vector/binary_quantized.rs b/crates/meilisearch/tests/vector/binary_quantized.rs index 790df5459..266f84f7d 100644 --- a/crates/meilisearch/tests/vector/binary_quantized.rs +++ b/crates/meilisearch/tests/vector/binary_quantized.rs @@ -35,7 +35,7 @@ async fn retrieve_binary_quantize_status_in_the_settings() { let (settings, code) = index.settings().await; snapshot!(code, @"200 OK"); - snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3}"###); + snapshot!(settings["embedders"]["manual"], @r#"{"source":"userProvided","dimensions":3}"#); let (response, code) = index .update_settings(json!({ @@ -53,7 +53,7 @@ async fn retrieve_binary_quantize_status_in_the_settings() { let (settings, code) = index.settings().await; snapshot!(code, @"200 OK"); - snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":false}"###); + snapshot!(settings["embedders"]["manual"], @r#"{"source":"userProvided","dimensions":3,"binaryQuantized":false}"#); let (response, code) = index .update_settings(json!({ @@ -71,7 +71,7 @@ async fn retrieve_binary_quantize_status_in_the_settings() { let (settings, code) = index.settings().await; snapshot!(code, @"200 OK"); - snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":true}"###); + snapshot!(settings["embedders"]["manual"], @r#"{"source":"userProvided","dimensions":3,"binaryQuantized":true}"#); } #[actix_rt::test] @@ -300,7 +300,7 @@ async fn try_to_disable_binary_quantization() { .await; snapshot!(code, @"202 Accepted"); let ret = server.wait_task(response.uid()).await; - snapshot!(ret, @r###" + snapshot!(ret, @r#" { "uid": "[uid]", "batchUid": "[batch_uid]", @@ -328,7 +328,7 @@ async fn try_to_disable_binary_quantization() { "startedAt": "[date]", "finishedAt": "[date]" } - "###); + "#); } #[actix_rt::test] diff --git a/crates/meilisearch/tests/vector/mod.rs b/crates/meilisearch/tests/vector/mod.rs index adad9fa81..86c865384 100644 --- a/crates/meilisearch/tests/vector/mod.rs +++ b/crates/meilisearch/tests/vector/mod.rs @@ -55,7 +55,7 @@ async fn add_remove_user_provided() { })) .await; snapshot!(code, @"202 Accepted"); - server.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); let documents = json!([ {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }}, @@ -63,7 +63,7 @@ async fn add_remove_user_provided() { ]); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (documents, _code) = index .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) @@ -116,7 +116,7 @@ async fn add_remove_user_provided() { ]); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (documents, _code) = index .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) @@ -159,7 +159,7 @@ async fn add_remove_user_provided() { let (value, code) = index.delete_document(0).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (documents, _code) = index .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) @@ -221,7 +221,7 @@ async fn generate_default_user_provided_documents(server: &Server) -> Index { ]); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); index } @@ -618,7 +618,7 @@ async fn clear_documents() { let index = generate_default_user_provided_documents(&server).await; let (value, _code) = index.clear_all_documents().await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // Make sure the documents DB has been cleared let (documents, _code) = index diff --git a/crates/meilisearch/tests/vector/settings.rs b/crates/meilisearch/tests/vector/settings.rs index 027c55219..85b7d2b7f 100644 --- a/crates/meilisearch/tests/vector/settings.rs +++ b/crates/meilisearch/tests/vector/settings.rs @@ -73,7 +73,7 @@ async fn update_embedder() { })) .await; snapshot!(code, @"202 Accepted"); - server.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ diff --git a/crates/meilitool/Cargo.toml b/crates/meilitool/Cargo.toml index 7d0b9f32c..ffd13da34 100644 --- a/crates/meilitool/Cargo.toml +++ b/crates/meilitool/Cargo.toml @@ -9,16 +9,16 @@ edition.workspace = true license.workspace = true [dependencies] -anyhow = "1.0.86" +anyhow = "1.0.95" arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" } -clap = { version = "4.5.9", features = ["derive"] } +clap = { version = "4.5.24", features = ["derive"] } dump = { path = "../dump" } file-store = { path = "../file-store" } -indexmap = {version = "2.7.0", features = ["serde"]} +indexmap = { version = "2.7.0", features = ["serde"] } meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -serde = { version = "1.0.209", features = ["derive"] } -serde_json = {version = "1.0.133", features = ["preserve_order"]} -tempfile = "3.14.0" -time = { version = "0.3.36", features = ["formatting", "parsing", "alloc"] } -uuid = { version = "1.10.0", features = ["v4"], default-features = false } +serde = { version = "1.0.217", features = ["derive"] } +serde_json = { version = "1.0.135", features = ["preserve_order"] } +tempfile = "3.15.0" +time = { version = "0.3.37", features = ["formatting", "parsing", "alloc"] } +uuid = { version = "1.11.0", features = ["v4"], default-features = false } diff --git a/crates/meilitool/src/upgrade/mod.rs b/crates/meilitool/src/upgrade/mod.rs index 14f941311..47ca2cbd9 100644 --- a/crates/meilitool/src/upgrade/mod.rs +++ b/crates/meilitool/src/upgrade/mod.rs @@ -8,7 +8,7 @@ use std::path::{Path, PathBuf}; use anyhow::{bail, Context}; use meilisearch_types::versioning::create_version_file; use v1_10::v1_9_to_v1_10; -use v1_12::v1_11_to_v1_12; +use v1_12::{v1_11_to_v1_12, v1_12_to_v1_12_3}; use crate::upgrade::v1_11::v1_10_to_v1_11; @@ -21,9 +21,15 @@ pub struct OfflineUpgrade { impl OfflineUpgrade { pub fn upgrade(self) -> anyhow::Result<()> { let upgrade_list = [ - (v1_9_to_v1_10 as fn(&Path) -> Result<(), anyhow::Error>, "1", "10", "0"), + ( + v1_9_to_v1_10 as fn(&Path, &str, &str, &str) -> Result<(), anyhow::Error>, + "1", + "10", + "0", + ), (v1_10_to_v1_11, "1", "11", "0"), (v1_11_to_v1_12, "1", "12", "0"), + (v1_12_to_v1_12_3, "1", "12", "3"), ]; let (current_major, current_minor, current_patch) = &self.current_version; @@ -36,6 +42,7 @@ impl OfflineUpgrade { ("1", "9", _) => 0, ("1", "10", _) => 1, ("1", "11", _) => 2, + ("1", "12", x) if x == "0" || x == "1" || x == "2" => 3, _ => { bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9 and v1.10") } @@ -46,7 +53,8 @@ impl OfflineUpgrade { let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) { ("1", "10", _) => 0, ("1", "11", _) => 1, - ("1", "12", _) => 2, + ("1", "12", x) if x == "0" || x == "1" || x == "2" => 2, + ("1", "12", "3") => 3, (major, _, _) if major.starts_with('v') => { bail!("Target version must not starts with a `v`. Instead of writing `v1.9.0` write `1.9.0` for example.") } @@ -60,7 +68,7 @@ impl OfflineUpgrade { #[allow(clippy::needless_range_loop)] for index in start_at..=ends_at { let (func, major, minor, patch) = upgrade_list[index]; - (func)(&self.db_path)?; + (func)(&self.db_path, current_major, current_minor, current_patch)?; println!("Done"); // We're writing the version file just in case an issue arise _while_ upgrading. // We don't want the DB to fail in an unknown state. diff --git a/crates/meilitool/src/upgrade/v1_10.rs b/crates/meilitool/src/upgrade/v1_10.rs index 4a49ea471..a35fd4184 100644 --- a/crates/meilitool/src/upgrade/v1_10.rs +++ b/crates/meilitool/src/upgrade/v1_10.rs @@ -151,7 +151,12 @@ fn date_round_trip( Ok(()) } -pub fn v1_9_to_v1_10(db_path: &Path) -> anyhow::Result<()> { +pub fn v1_9_to_v1_10( + db_path: &Path, + _origin_major: &str, + _origin_minor: &str, + _origin_patch: &str, +) -> anyhow::Result<()> { println!("Upgrading from v1.9.0 to v1.10.0"); // 2 changes here diff --git a/crates/meilitool/src/upgrade/v1_11.rs b/crates/meilitool/src/upgrade/v1_11.rs index 92d853dd0..e24a35e8b 100644 --- a/crates/meilitool/src/upgrade/v1_11.rs +++ b/crates/meilitool/src/upgrade/v1_11.rs @@ -14,7 +14,12 @@ use meilisearch_types::milli::index::db_name; use crate::uuid_codec::UuidCodec; use crate::{try_opening_database, try_opening_poly_database}; -pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> { +pub fn v1_10_to_v1_11( + db_path: &Path, + _origin_major: &str, + _origin_minor: &str, + _origin_patch: &str, +) -> anyhow::Result<()> { println!("Upgrading from v1.10.0 to v1.11.0"); let index_scheduler_path = db_path.join("tasks"); diff --git a/crates/meilitool/src/upgrade/v1_12.rs b/crates/meilitool/src/upgrade/v1_12.rs index 444617375..593fb833c 100644 --- a/crates/meilitool/src/upgrade/v1_12.rs +++ b/crates/meilitool/src/upgrade/v1_12.rs @@ -1,17 +1,34 @@ //! The breaking changes that happened between the v1.11 and the v1.12 are: //! - The new indexer changed the update files format from OBKV to ndjson. https://github.com/meilisearch/meilisearch/pull/4900 +use std::borrow::Cow; use std::io::BufWriter; use std::path::Path; +use std::sync::atomic::AtomicBool; use anyhow::Context; use file_store::FileStore; use indexmap::IndexMap; use meilisearch_types::milli::documents::DocumentsBatchReader; +use meilisearch_types::milli::heed::types::{SerdeJson, Str}; +use meilisearch_types::milli::heed::{Database, EnvOpenOptions, RoTxn, RwTxn}; +use meilisearch_types::milli::progress::Step; +use meilisearch_types::milli::{FieldDistribution, Index}; +use serde::Serialize; use serde_json::value::RawValue; use tempfile::NamedTempFile; +use time::OffsetDateTime; +use uuid::Uuid; -pub fn v1_11_to_v1_12(db_path: &Path) -> anyhow::Result<()> { +use crate::try_opening_database; +use crate::uuid_codec::UuidCodec; + +pub fn v1_11_to_v1_12( + db_path: &Path, + _origin_major: &str, + _origin_minor: &str, + _origin_patch: &str, +) -> anyhow::Result<()> { println!("Upgrading from v1.11.0 to v1.12.0"); convert_update_files(db_path)?; @@ -19,6 +36,23 @@ pub fn v1_11_to_v1_12(db_path: &Path) -> anyhow::Result<()> { Ok(()) } +pub fn v1_12_to_v1_12_3( + db_path: &Path, + origin_major: &str, + origin_minor: &str, + origin_patch: &str, +) -> anyhow::Result<()> { + println!("Upgrading from v1.12.{{0, 1, 2}} to v1.12.3"); + + if origin_minor == "12" { + rebuild_field_distribution(db_path)?; + } else { + println!("Not rebuilding field distribution as it wasn't corrupted coming from v{origin_major}.{origin_minor}.{origin_patch}"); + } + + Ok(()) +} + /// Convert the update files from OBKV to ndjson format. /// /// 1) List all the update files using the file store. @@ -77,3 +111,188 @@ fn convert_update_files(db_path: &Path) -> anyhow::Result<()> { Ok(()) } + +/// Rebuild field distribution as it was wrongly computed in v1.12.x if x < 3 +fn rebuild_field_distribution(db_path: &Path) -> anyhow::Result<()> { + let index_scheduler_path = db_path.join("tasks"); + let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } + .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; + + let mut sched_wtxn = env.write_txn()?; + + let index_mapping: Database = + try_opening_database(&env, &sched_wtxn, "index-mapping")?; + let stats_db: Database> = + try_opening_database(&env, &sched_wtxn, "index-stats").with_context(|| { + format!("While trying to open {:?}", index_scheduler_path.display()) + })?; + + let index_count = + index_mapping.len(&sched_wtxn).context("while reading the number of indexes")?; + + // FIXME: not ideal, we have to pre-populate all indexes to prevent double borrow of sched_wtxn + // 1. immutably for the iteration + // 2. mutably for updating index stats + let indexes: Vec<_> = index_mapping + .iter(&sched_wtxn)? + .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid))) + .collect(); + + let progress = meilisearch_types::milli::progress::Progress::default(); + let finished = AtomicBool::new(false); + + std::thread::scope(|scope| { + let display_progress = std::thread::Builder::new() + .name("display_progress".into()) + .spawn_scoped(scope, || { + while !finished.load(std::sync::atomic::Ordering::Relaxed) { + std::thread::sleep(std::time::Duration::from_secs(5)); + let view = progress.as_progress_view(); + let Ok(view) = serde_json::to_string(&view) else { + continue; + }; + println!("{view}"); + } + }) + .unwrap(); + + for (index_index, result) in indexes.into_iter().enumerate() { + let (uid, uuid) = result?; + progress.update_progress(VariableNameStep::new( + &uid, + index_index as u32, + index_count as u32, + )); + let index_path = db_path.join("indexes").join(uuid.to_string()); + + println!( + "[{}/{index_count}]Updating index `{uid}` at `{}`", + index_index + 1, + index_path.display() + ); + + println!("\t- Rebuilding field distribution"); + + let index = meilisearch_types::milli::Index::new(EnvOpenOptions::new(), &index_path) + .with_context(|| { + format!("while opening index {uid} at '{}'", index_path.display()) + })?; + + let mut index_txn = index.write_txn()?; + + meilisearch_types::milli::update::new::reindex::field_distribution( + &index, + &mut index_txn, + &progress, + ) + .context("while rebuilding field distribution")?; + + let stats = IndexStats::new(&index, &index_txn) + .with_context(|| format!("computing stats for index `{uid}`"))?; + store_stats_of(stats_db, uuid, &mut sched_wtxn, &uid, &stats)?; + + index_txn.commit().context("while committing the write txn for the updated index")?; + } + + sched_wtxn.commit().context("while committing the write txn for the index-scheduler")?; + + finished.store(true, std::sync::atomic::Ordering::Relaxed); + + if let Err(panic) = display_progress.join() { + let msg = match panic.downcast_ref::<&'static str>() { + Some(s) => *s, + None => match panic.downcast_ref::() { + Some(s) => &s[..], + None => "Box", + }, + }; + eprintln!("WARN: the display thread panicked with {msg}"); + } + + println!("Upgrading database succeeded"); + Ok(()) + }) +} + +pub struct VariableNameStep { + name: String, + current: u32, + total: u32, +} + +impl VariableNameStep { + pub fn new(name: impl Into, current: u32, total: u32) -> Self { + Self { name: name.into(), current, total } + } +} + +impl Step for VariableNameStep { + fn name(&self) -> Cow<'static, str> { + self.name.clone().into() + } + + fn current(&self) -> u32 { + self.current + } + + fn total(&self) -> u32 { + self.total + } +} + +pub fn store_stats_of( + stats_db: Database>, + index_uuid: Uuid, + sched_wtxn: &mut RwTxn, + index_uid: &str, + stats: &IndexStats, +) -> anyhow::Result<()> { + stats_db + .put(sched_wtxn, &index_uuid, stats) + .with_context(|| format!("storing stats for index `{index_uid}`"))?; + Ok(()) +} + +/// The statistics that can be computed from an `Index` object. +#[derive(Serialize, Debug)] +pub struct IndexStats { + /// Number of documents in the index. + pub number_of_documents: u64, + /// Size taken up by the index' DB, in bytes. + /// + /// This includes the size taken by both the used and free pages of the DB, and as the free pages + /// are not returned to the disk after a deletion, this number is typically larger than + /// `used_database_size` that only includes the size of the used pages. + pub database_size: u64, + /// Size taken by the used pages of the index' DB, in bytes. + /// + /// As the DB backend does not return to the disk the pages that are not currently used by the DB, + /// this value is typically smaller than `database_size`. + pub used_database_size: u64, + /// Association of every field name with the number of times it occurs in the documents. + pub field_distribution: FieldDistribution, + /// Creation date of the index. + #[serde(with = "time::serde::rfc3339")] + pub created_at: OffsetDateTime, + /// Date of the last update of the index. + #[serde(with = "time::serde::rfc3339")] + pub updated_at: OffsetDateTime, +} + +impl IndexStats { + /// Compute the stats of an index + /// + /// # Parameters + /// + /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`. + pub fn new(index: &Index, rtxn: &RoTxn) -> meilisearch_types::milli::Result { + Ok(IndexStats { + number_of_documents: index.number_of_documents(rtxn)?, + database_size: index.on_disk_size()?, + used_database_size: index.used_size()?, + field_distribution: index.field_distribution(rtxn)?, + created_at: index.created_at(rtxn)?, + updated_at: index.updated_at(rtxn)?, + }) + } +} diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml index 9f113e013..d22829045 100644 --- a/crates/milli/Cargo.toml +++ b/crates/milli/Cargo.toml @@ -15,68 +15,68 @@ license.workspace = true big_s = "1.0.2" bimap = { version = "0.6.3", features = ["serde"] } bincode = "1.3.3" -bstr = "1.9.1" -bytemuck = { version = "1.18.0", features = ["extern_crate_alloc"] } +bstr = "1.11.3" +bytemuck = { version = "1.21.0", features = ["extern_crate_alloc"] } byteorder = "1.5.0" charabia = { version = "0.9.2", default-features = false } concat-arrays = "0.1.2" -crossbeam-channel = "0.5.13" -deserr = "0.6.2" +crossbeam-channel = "0.5.14" +deserr = "0.6.3" either = { version = "1.13.0", features = ["serde"] } flatten-serde-json = { path = "../flatten-serde-json" } fst = "0.4.7" fxhash = "0.2.1" geoutils = "0.5.1" grenad = { version = "0.5.0", default-features = false, features = ["rayon", "tempfile"] } -heed = { version = "0.20.3", default-features = false, features = [ +heed = { version = "0.20.5", default-features = false, features = [ "serde-json", "serde-bincode", "read-txn-no-tls", ] } -indexmap = { version = "2.2.6", features = ["serde"] } +indexmap = { version = "2.7.0", features = ["serde"] } json-depth-checker = { path = "../json-depth-checker" } levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } -memchr = "2.5.0" -memmap2 = "0.9.4" +memchr = "2.7.4" +memmap2 = "0.9.5" obkv = "0.3.0" -once_cell = "1.19.0" -ordered-float = "4.2.1" +once_cell = "1.20.2" +ordered-float = "4.6.0" rayon = "1.10.0" -roaring = { version = "0.10.7", features = ["serde"] } -rstar = { version = "0.12.0", features = ["serde"] } -serde = { version = "1.0.204", features = ["derive"] } -serde_json = { version = "1.0.120", features = ["preserve_order", "raw_value"] } +roaring = { version = "0.10.10", features = ["serde"] } +rstar = { version = "0.12.2", features = ["serde"] } +serde = { version = "1.0.217", features = ["derive"] } +serde_json = { version = "1.0.135", features = ["preserve_order", "raw_value"] } slice-group-by = "0.3.1" smallstr = { version = "0.3.0", features = ["serde"] } smallvec = "1.13.2" smartstring = "1.0.1" -tempfile = "3.10.1" -thiserror = "1.0.61" -time = { version = "0.3.36", features = [ +tempfile = "3.15.0" +thiserror = "2.0.9" +time = { version = "0.3.37", features = [ "serde-well-known", "formatting", "parsing", "macros", ] } -uuid = { version = "1.10.0", features = ["v4"] } +uuid = { version = "1.11.0", features = ["v4"] } filter-parser = { path = "../filter-parser" } # documents words self-join -itertools = "0.13.0" +itertools = "0.14.0" -csv = "1.3.0" -candle-core = { version = "0.6.0" } -candle-transformers = { version = "0.6.0" } -candle-nn = { version = "0.6.0" } +csv = "1.3.1" +candle-core = { version = "0.8.2" } +candle-transformers = { version = "0.8.2" } +candle-nn = { version = "0.8.2" } tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [ "onig", ] } hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [ "online", ] } -tiktoken-rs = "0.5.9" -liquid = "0.26.6" +tiktoken-rs = "0.6.0" +liquid = "0.26.9" rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = [ "serde", "no_module", @@ -86,24 +86,26 @@ rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838 ] } arroy = "0.5.0" rand = "0.8.5" -tracing = "0.1.40" -ureq = { version = "2.10.0", features = ["json"] } -url = "2.5.2" +tracing = "0.1.41" +ureq = { version = "2.12.1", features = ["json"] } +url = "2.5.4" rayon-par-bridge = "0.1.0" -hashbrown = "0.15.0" +hashbrown = "0.15.2" bumpalo = "3.16.0" -bumparaw-collections = "0.1.2" +bumparaw-collections = "0.1.4" thread_local = "1.1.8" -allocator-api2 = "0.2.18" -rustc-hash = "2.0.0" +allocator-api2 = "0.2.21" +rustc-hash = "2.1.0" uell = "0.1.0" enum-iterator = "2.1.0" bbqueue = { git = "https://github.com/meilisearch/bbqueue" } flume = { version = "0.11.1", default-features = false } +utoipa = { version = "5.3.1", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] } [dev-dependencies] mimalloc = { version = "0.1.43", default-features = false } -insta = "1.39.0" +# fixed version due to format breakages in v1.40 +insta = "=1.39.0" maplit = "1.0.2" md5 = "0.7.0" meili-snap = { path = "../meili-snap" } diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index f5f784ee0..c1b51f192 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -134,7 +134,7 @@ and can not be more than 511 bytes.", .document_id.to_string() InvalidVectorsEmbedderConf { document_id: String, error: String }, #[error("{0}")] InvalidFilter(String), - #[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))] + #[error("Invalid type for filter subexpression: expected: {}, found: {}.", .0.join(", "), .1)] InvalidFilterExpression(&'static [&'static str], Value), #[error("Attribute `{}` is not sortable. {}", .field, diff --git a/crates/milli/src/lib.rs b/crates/milli/src/lib.rs index db44f745f..ea88d2b78 100644 --- a/crates/milli/src/lib.rs +++ b/crates/milli/src/lib.rs @@ -1,4 +1,3 @@ -#![cfg_attr(all(test, fuzzing), feature(no_coverage))] #![allow(clippy::type_complexity)] #[cfg(not(windows))] diff --git a/crates/milli/src/localized_attributes_rules.rs b/crates/milli/src/localized_attributes_rules.rs index 3c421ca6b..2b9bf099c 100644 --- a/crates/milli/src/localized_attributes_rules.rs +++ b/crates/milli/src/localized_attributes_rules.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use charabia::Language; use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; use crate::fields_ids_map::FieldsIdsMap; use crate::FieldId; @@ -14,9 +15,10 @@ use crate::FieldId; /// The pattern `attribute_name*` matches any attribute name that starts with `attribute_name`. /// The pattern `*attribute_name` matches any attribute name that ends with `attribute_name`. /// The pattern `*attribute_name*` matches any attribute name that contains `attribute_name`. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] pub struct LocalizedAttributesRule { pub attribute_patterns: Vec, + #[schema(value_type = Vec)] pub locales: Vec, } diff --git a/crates/milli/src/progress.rs b/crates/milli/src/progress.rs index accc2cf56..622ec9842 100644 --- a/crates/milli/src/progress.rs +++ b/crates/milli/src/progress.rs @@ -4,6 +4,7 @@ use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::{Arc, RwLock}; use serde::Serialize; +use utoipa::ToSchema; pub trait Step: 'static + Send + Sync { fn name(&self) -> Cow<'static, str>; @@ -136,15 +137,17 @@ macro_rules! make_atomic_progress { make_atomic_progress!(Document alias AtomicDocumentStep => "document" ); make_atomic_progress!(Payload alias AtomicPayloadStep => "payload" ); -#[derive(Debug, Serialize, Clone)] +#[derive(Debug, Serialize, Clone, ToSchema)] #[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] pub struct ProgressView { pub steps: Vec, pub percentage: f32, } -#[derive(Debug, Serialize, Clone)] +#[derive(Debug, Serialize, Clone, ToSchema)] #[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] pub struct ProgressStepView { pub current_step: Cow<'static, str>, pub finished: u32, diff --git a/crates/milli/src/search/facet/facet_range_search.rs b/crates/milli/src/search/facet/facet_range_search.rs index 0f8f58771..47e4defec 100644 --- a/crates/milli/src/search/facet/facet_range_search.rs +++ b/crates/milli/src/search/facet/facet_range_search.rs @@ -132,12 +132,12 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { /// /// 1. So long as the element's range is less than the left bound, we do nothing and keep iterating /// 2. If the element's range is fully contained by the bounds, then all of its docids are added to - /// the roaring bitmap. + /// the roaring bitmap. /// 3. If the element's range merely intersects the bounds, then we call the algorithm recursively - /// on the children of the element from the level below. + /// on the children of the element from the level below. /// 4. If the element's range is greater than the right bound, we do nothing and stop iterating. - /// Note that the right bound is found through either the `left_bound` of the *next* element, - /// or from the `rightmost_bound` argument + /// Note that the right bound is found through either the `left_bound` of the *next* element, + /// or from the `rightmost_bound` argument /// /// ## Arguments /// - `level`: the level being visited diff --git a/crates/milli/src/search/new/distinct.rs b/crates/milli/src/search/new/distinct.rs index d2ace6ffb..17859b6f8 100644 --- a/crates/milli/src/search/new/distinct.rs +++ b/crates/milli/src/search/new/distinct.rs @@ -18,10 +18,10 @@ pub struct DistinctOutput { /// Return a [`DistinctOutput`] containing: /// - `remaining`: a set of docids built such that exactly one element from `candidates` -/// is kept for each distinct value inside the given field. If the field does not exist, it -/// is considered unique. +/// is kept for each distinct value inside the given field. If the field does not exist, it +/// is considered unique. /// - `excluded`: the set of document ids that contain a value for the given field that occurs -/// in the given candidates. +/// in the given candidates. pub fn apply_distinct_rule( ctx: &mut SearchContext<'_>, field_id: u16, diff --git a/crates/milli/src/search/new/matches/matching_words.rs b/crates/milli/src/search/new/matches/matching_words.rs index 1f30a17ad..64235298b 100644 --- a/crates/milli/src/search/new/matches/matching_words.rs +++ b/crates/milli/src/search/new/matches/matching_words.rs @@ -149,7 +149,7 @@ pub type WordId = u16; /// A given token can partially match a query word for several reasons: /// - split words /// - multi-word synonyms -/// In these cases we need to match consecutively several tokens to consider that the match is full. +/// In these cases we need to match consecutively several tokens to consider that the match is full. #[derive(Debug, PartialEq)] pub enum MatchType<'a> { Full { char_count: usize, byte_len: usize, ids: &'a RangeInclusive }, diff --git a/crates/milli/src/search/new/matches/mod.rs b/crates/milli/src/search/new/matches/mod.rs index 19c1127cd..83d00caf0 100644 --- a/crates/milli/src/search/new/matches/mod.rs +++ b/crates/milli/src/search/new/matches/mod.rs @@ -13,6 +13,7 @@ use matching_words::{MatchType, PartialMatch}; use r#match::{Match, MatchPosition}; use serde::Serialize; use simple_token_kind::SimpleTokenKind; +use utoipa::ToSchema; const DEFAULT_CROP_MARKER: &str = "…"; const DEFAULT_HIGHLIGHT_PREFIX: &str = ""; @@ -100,7 +101,7 @@ impl FormatOptions { } } -#[derive(Serialize, Debug, Clone, PartialEq, Eq)] +#[derive(Serialize, Debug, Clone, PartialEq, Eq, ToSchema)] pub struct MatchBounds { pub start: usize, pub length: usize, diff --git a/crates/milli/src/search/new/query_graph.rs b/crates/milli/src/search/new/query_graph.rs index 9ab5d9dad..24cce039b 100644 --- a/crates/milli/src/search/new/query_graph.rs +++ b/crates/milli/src/search/new/query_graph.rs @@ -21,9 +21,9 @@ use crate::Result; /// 1. `Start` : unique, represents the start of the query /// 2. `End` : unique, represents the end of a query /// 3. `Deleted` : represents a node that was deleted. -/// All deleted nodes are unreachable from the start node. +/// All deleted nodes are unreachable from the start node. /// 4. `Term` is a regular node representing a word or combination of words -/// from the user query. +/// from the user query. #[derive(Clone)] pub struct QueryNode { pub data: QueryNodeData, diff --git a/crates/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/crates/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs index cacdcfa9f..65580bce5 100644 --- a/crates/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs +++ b/crates/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs @@ -8,7 +8,7 @@ with them, they are "unconditional". These kinds of edges are used to "skip" a n The algorithm uses a depth-first search. It benefits from two main optimisations: - The list of all possible costs to go from any node to the END node is precomputed - The `DeadEndsCache` reduces the number of valid paths drastically, by making some edges -untraversable depending on what other edges were selected. + untraversable depending on what other edges were selected. These two optimisations are meant to avoid traversing edges that wouldn't lead to a valid path. In practically all cases, we avoid the exponential complexity @@ -24,6 +24,7 @@ For example, the DeadEndsCache could say the following: - if we take `g`, then `[f]` is also forbidden - etc. - etc. + As we traverse the graph, we also traverse the `DeadEndsCache` and keep a list of forbidden conditions in memory. Then, we know to avoid all edges which have a condition that is forbidden. diff --git a/crates/milli/src/search/new/ranking_rule_graph/mod.rs b/crates/milli/src/search/new/ranking_rule_graph/mod.rs index 670402bcb..041cb99b8 100644 --- a/crates/milli/src/search/new/ranking_rule_graph/mod.rs +++ b/crates/milli/src/search/new/ranking_rule_graph/mod.rs @@ -58,7 +58,7 @@ pub struct ComputedCondition { /// 2. The cost of traversing this edge /// 3. The condition associated with it /// 4. The list of nodes that have to be skipped -/// if this edge is traversed. +/// if this edge is traversed. #[derive(Clone)] pub struct Edge { pub source_node: Interned, diff --git a/crates/milli/src/search/new/tests/exactness.rs b/crates/milli/src/search/new/tests/exactness.rs index c52006e3d..e1d6cc1ca 100644 --- a/crates/milli/src/search/new/tests/exactness.rs +++ b/crates/milli/src/search/new/tests/exactness.rs @@ -14,7 +14,7 @@ This module tests the following properties about the exactness ranking rule: 3. those that contain the most exact words from the remaining query - if it is followed by other graph-based ranking rules (`typo`, `proximity`, `attribute`). -Then these rules will only work with + Then these rules will only work with 1. the exact terms selected by `exactness 2. the full query term otherwise */ diff --git a/crates/milli/src/search/new/tests/proximity.rs b/crates/milli/src/search/new/tests/proximity.rs index 2d181a537..97c85a53d 100644 --- a/crates/milli/src/search/new/tests/proximity.rs +++ b/crates/milli/src/search/new/tests/proximity.rs @@ -4,15 +4,14 @@ This module tests the Proximity ranking rule: 1. A proximity of >7 always has the same cost. 2. Phrase terms can be in sprximity to other terms via their start and end words, -but we need to make sure that the phrase exists in the document that meets this -proximity condition. This is especially relevant with split words and synonyms. + but we need to make sure that the phrase exists in the document that meets this + proximity condition. This is especially relevant with split words and synonyms. 3. An ngram has the same sprximity cost as its component words being consecutive. -e.g. `sunflower` equivalent to `sun flower`. + e.g. `sunflower` equivalent to `sun flower`. 4. The prefix databases can be used to find the sprximity between two words, but -they store fewer sprximities than the regular word sprximity DB. - + they store fewer sprximities than the regular word sprximity DB. */ use std::collections::BTreeMap; diff --git a/crates/milli/src/search/new/tests/typo.rs b/crates/milli/src/search/new/tests/typo.rs index 61d4c4387..1bbe08977 100644 --- a/crates/milli/src/search/new/tests/typo.rs +++ b/crates/milli/src/search/new/tests/typo.rs @@ -11,7 +11,7 @@ This module tests the following properties: 8. 2grams can have 1 typo if they are larger than `min_word_len_two_typos` 9. 3grams are not typo tolerant (but they can be split into two words) 10. The `typo` ranking rule assumes the role of the `words` ranking rule implicitly -if `words` doesn't exist before it. + if `words` doesn't exist before it. 11. The `typo` ranking rule places documents with the same number of typos in the same bucket 12. Prefix tolerance costs nothing according to the typo ranking rule 13. Split words cost 1 typo according to the typo ranking rule diff --git a/crates/milli/src/search/new/tests/words_tms.rs b/crates/milli/src/search/new/tests/words_tms.rs index ee8cfc51b..e058d81ae 100644 --- a/crates/milli/src/search/new/tests/words_tms.rs +++ b/crates/milli/src/search/new/tests/words_tms.rs @@ -2,11 +2,11 @@ This module tests the following properties: 1. The `last` term matching strategy starts removing terms from the query -starting from the end if no more results match it. + starting from the end if no more results match it. 2. Phrases are never deleted by the `last` term matching strategy 3. Duplicate words don't affect the ranking of a document according to the `words` ranking rule 4. The proximity of the first and last word of a phrase to its adjacent terms is taken into -account by the proximity ranking rule. + account by the proximity ranking rule. 5. Unclosed double quotes still make a phrase 6. The `all` term matching strategy does not remove any term from the query 7. The search is capable of returning no results if no documents match the query diff --git a/crates/milli/src/update/facet/incremental.rs b/crates/milli/src/update/facet/incremental.rs index a1fa07fe3..fc869ad65 100644 --- a/crates/milli/src/update/facet/incremental.rs +++ b/crates/milli/src/update/facet/incremental.rs @@ -21,29 +21,30 @@ use crate::{CboRoaringBitmapCodec, Index, Result}; /// Enum used as a return value for the facet incremental indexing. /// /// - `ModificationResult::InPlace` means that modifying the `facet_value` into the `level` did not have -/// an effect on the number of keys in that level. Therefore, it did not increase the number of children -/// of the parent node. +/// an effect on the number of keys in that level. Therefore, it did not increase the number of children +/// of the parent node. /// /// - `ModificationResult::Insert` means that modifying the `facet_value` into the `level` resulted -/// in the addition of a new key in that level, and that therefore the number of children -/// of the parent node should be incremented. +/// in the addition of a new key in that level, and that therefore the number of children +/// of the parent node should be incremented. /// /// - `ModificationResult::Remove` means that modifying the `facet_value` into the `level` resulted in a change in the -/// number of keys in the level. For example, removing a document id from the facet value `3` could -/// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted -/// entirely. In that case, `ModificationResult::Remove` is returned. The parent of the deleted key must -/// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well. +/// number of keys in the level. For example, removing a document id from the facet value `3` could +/// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted +/// entirely. In that case, `ModificationResult::Remove` is returned. The parent of the deleted key must +/// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well. /// /// - `ModificationResult::Reduce/Expand` means that modifying the `facet_value` into the `level` resulted in a change in the -/// bounds of the keys of the level. For example, removing a document id from the facet value -/// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore, -/// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4). -/// In that case `ModificationResult::Reduce` is returned. The parent of the reduced key may need to adjust -/// its left bound as well. +/// bounds of the keys of the level. For example, removing a document id from the facet value +/// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore, +/// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4). +/// In that case `ModificationResult::Reduce` is returned. The parent of the reduced key may need to adjust +/// its left bound as well. /// /// - `ModificationResult::Nothing` means that modifying the `facet_value` didn't have any impact into the `level`. -/// This case is reachable when a document id is removed from a sub-level node but is still present in another one. -/// For example, removing `2` from a document containing `2` and `3`, the document id will removed form the `level 0` but should remain in the group node [1..4] in `level 1`. +/// This case is reachable when a document id is removed from a sub-level node but is still present in another one. +/// For example, removing `2` from a document containing `2` and `3`, the document id will removed form the `level 0` +/// but should remain in the group node [1..4] in `level 1`. enum ModificationResult { InPlace, Expand, @@ -1059,208 +1060,3 @@ mod tests { milli_snap!(format!("{index}"), "after_delete"); } } - -// fuzz tests -#[cfg(all(test, fuzzing))] -/** -Fuzz test for the incremental indxer. - -The fuzz test uses fuzzcheck, a coverage-guided fuzzer. -See https://github.com/loiclec/fuzzcheck-rs and https://fuzzcheck.neocities.org -for more information. - -It is only run when using the `cargo fuzzcheck` command line tool, which can be installed with: -```sh -cargo install cargo-fuzzcheck -``` -To start the fuzz test, run (from the base folder or from milli/): -```sh -cargo fuzzcheck update::facet::incremental::fuzz::fuzz -``` -and wait a couple minutes to make sure the code was thoroughly tested, then -hit `Ctrl-C` to stop the fuzzer. The corpus generated by the fuzzer is located in milli/fuzz. - -To work on this module with rust-analyzer working properly, add the following to your .cargo/config.toml file: -```toml -[build] -rustflags = ["--cfg", "fuzzing"] -``` - -The fuzz test generates sequences of additions and deletions to the facet database and -ensures that: -1. its structure is still internally valid -2. its content is the same as a trivially correct implementation of the same database -*/ -mod fuzz { - use std::collections::{BTreeMap, HashMap}; - use std::iter::FromIterator; - use std::rc::Rc; - - use fuzzcheck::mutators::integer::U8Mutator; - use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator}; - use fuzzcheck::mutators::vector::VecMutator; - use fuzzcheck::DefaultMutator; - use roaring::RoaringBitmap; - use tempfile::TempDir; - - use super::*; - use crate::update::facet::test_helpers::FacetIndex; - #[derive(Default)] - pub struct TrivialDatabase { - pub elements: BTreeMap>, - } - impl TrivialDatabase - where - T: Ord + Clone + Eq + std::fmt::Debug, - { - #[no_coverage] - pub fn insert(&mut self, field_id: u16, new_key: &T, new_values: &RoaringBitmap) { - if new_values.is_empty() { - return; - } - let values_field_id = self.elements.entry(field_id).or_default(); - let values = values_field_id.entry(new_key.clone()).or_default(); - *values |= new_values; - } - #[no_coverage] - pub fn delete(&mut self, field_id: u16, key: &T, values_to_remove: &RoaringBitmap) { - if let Some(values_field_id) = self.elements.get_mut(&field_id) { - if let Some(values) = values_field_id.get_mut(&key) { - *values -= values_to_remove; - if values.is_empty() { - values_field_id.remove(&key); - } - } - if values_field_id.is_empty() { - self.elements.remove(&field_id); - } - } - } - } - #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)] - struct Operation { - #[field_mutator(VecMutator = { VecMutator::new(u8::default_mutator(), 0 ..= 5) })] - key: Vec, - #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })] - group_size: u8, - #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })] - max_group_size: u8, - #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })] - min_level_size: u8, - #[field_mutator(U16WithinRangeMutator = { U16WithinRangeMutator::new(..=3) })] - field_id: u16, - kind: OperationKind, - } - #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)] - enum OperationKind { - Insert( - #[field_mutator(VecMutator = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })] - Vec, - ), - Delete( - #[field_mutator(VecMutator = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })] - Vec, - ), - } - - #[no_coverage] - fn compare_with_trivial_database(tempdir: Rc, operations: &[Operation]) { - let index = FacetIndex::::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten - let mut txn = index.env.write_txn().unwrap(); - - let mut trivial_db = TrivialDatabase::>::default(); - let mut value_to_keys = HashMap::>>::new(); - for Operation { key, group_size, max_group_size, min_level_size, field_id, kind } in - operations - { - index.set_group_size(*group_size); - index.set_max_group_size(*max_group_size); - index.set_min_level_size(*min_level_size); - match kind { - OperationKind::Insert(values) => { - let mut bitmap = RoaringBitmap::new(); - for value in values { - bitmap.insert(*value as u32); - value_to_keys.entry(*value).or_default().push(key.clone()); - } - index.insert(&mut txn, *field_id, &key.as_slice(), &bitmap); - trivial_db.insert(*field_id, &key, &bitmap); - } - OperationKind::Delete(values) => { - let values = RoaringBitmap::from_iter(values.iter().copied().map(|x| x as u32)); - let mut values_per_key = HashMap::new(); - - for value in values { - if let Some(keys) = value_to_keys.get(&(value as u8)) { - for key in keys { - let values: &mut RoaringBitmap = - values_per_key.entry(key).or_default(); - values.insert(value); - } - } - } - for (key, values) in values_per_key { - index.delete(&mut txn, *field_id, &key.as_slice(), &values); - trivial_db.delete(*field_id, &key, &values); - } - } - } - } - - for (field_id, values_field_id) in trivial_db.elements.iter() { - let level0iter = index - .content - .as_polymorph() - .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes()) - .unwrap(); - - for ((key, values), group) in values_field_id.iter().zip(level0iter) { - let (group_key, group_values) = group.unwrap(); - let group_key = - FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); - assert_eq!(key, &group_key.left_bound); - assert_eq!(values, &group_values.bitmap); - } - } - - for (field_id, values_field_id) in trivial_db.elements.iter() { - let level0iter = index - .content - .as_polymorph() - .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes()) - .unwrap(); - - for ((key, values), group) in values_field_id.iter().zip(level0iter) { - let (group_key, group_values) = group.unwrap(); - let group_key = - FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); - assert_eq!(key, &group_key.left_bound); - assert_eq!(values, &group_values.bitmap); - } - index.verify_structure_validity(&txn, *field_id); - } - txn.abort().unwrap(); - } - - #[test] - #[no_coverage] - fn fuzz() { - let tempdir = Rc::new(TempDir::new().unwrap()); - let tempdir_cloned = tempdir.clone(); - let result = fuzzcheck::fuzz_test(move |operations: &[Operation]| { - compare_with_trivial_database(tempdir_cloned.clone(), operations) - }) - .default_mutator() - .serde_serializer() - .default_sensor_and_pool_with_custom_filter(|file, function| { - file == std::path::Path::new("milli/src/update/facet/incremental.rs") - && !function.contains("serde") - && !function.contains("tests::") - && !function.contains("fuzz::") - && !function.contains("display_bitmap") - }) - .arguments_from_cargo_fuzzcheck() - .launch(); - assert!(!result.found_test_failure); - } -} diff --git a/crates/milli/src/update/facet/mod.rs b/crates/milli/src/update/facet/mod.rs index 911296577..dbacf6248 100644 --- a/crates/milli/src/update/facet/mod.rs +++ b/crates/milli/src/update/facet/mod.rs @@ -79,22 +79,29 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5; use std::collections::BTreeSet; use std::fs::File; use std::io::BufReader; +use std::ops::Bound; use grenad::Merger; use heed::types::{Bytes, DecodeIgnore}; +use heed::BytesDecode as _; +use roaring::RoaringBitmap; use time::OffsetDateTime; use tracing::debug; use self::incremental::FacetsUpdateIncremental; use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps}; use crate::facet::FacetType; -use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; +use crate::heed_codec::facet::{ + FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec, +}; use crate::heed_codec::BytesRefCodec; +use crate::search::facet::get_highest_level; use crate::update::del_add::{DelAdd, KvReaderDelAdd}; use crate::{try_split_array_at, FieldId, Index, Result}; pub mod bulk; pub mod incremental; +pub mod new_incremental; /// A builder used to add new elements to the `facet_id_string_docids` or `facet_id_f64_docids` databases. /// @@ -346,35 +353,6 @@ pub(crate) mod test_helpers { for<'a> BoundCodec: BytesEncode<'a> + BytesDecode<'a, DItem = >::EItem>, { - #[cfg(all(test, fuzzing))] - pub fn open_from_tempdir( - tempdir: Rc, - group_size: u8, - max_group_size: u8, - min_level_size: u8, - ) -> FacetIndex { - let group_size = std::cmp::min(16, std::cmp::max(group_size, 2)); // 2 <= x <= 16 - let max_group_size = std::cmp::min(16, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 16 - let min_level_size = std::cmp::min(17, std::cmp::max(1, min_level_size)); // 1 <= x <= 17 - - let mut options = heed::EnvOpenOptions::new(); - let options = options.map_size(4096 * 4 * 10 * 1000); - unsafe { - options.flag(heed::flags::Flags::MdbAlwaysFreePages); - } - let env = options.open(tempdir.path()).unwrap(); - let content = env.open_database(None).unwrap().unwrap(); - - FacetIndex { - content, - group_size: Cell::new(group_size), - max_group_size: Cell::new(max_group_size), - min_level_size: Cell::new(min_level_size), - _tempdir: tempdir, - env, - _phantom: PhantomData, - } - } pub fn new( group_size: u8, max_group_size: u8, @@ -402,26 +380,6 @@ pub(crate) mod test_helpers { } } - #[cfg(all(test, fuzzing))] - pub fn set_group_size(&self, group_size: u8) { - // 2 <= x <= 64 - self.group_size.set(std::cmp::min(64, std::cmp::max(group_size, 2))); - } - #[cfg(all(test, fuzzing))] - pub fn set_max_group_size(&self, max_group_size: u8) { - // 2*group_size <= x <= 128 - let max_group_size = std::cmp::max(4, std::cmp::min(128, max_group_size)); - self.max_group_size.set(max_group_size); - if self.group_size.get() < max_group_size / 2 { - self.group_size.set(max_group_size / 2); - } - } - #[cfg(all(test, fuzzing))] - pub fn set_min_level_size(&self, min_level_size: u8) { - // 1 <= x <= inf - self.min_level_size.set(std::cmp::max(1, min_level_size)); - } - pub fn insert<'a>( &self, wtxn: &'a mut RwTxn<'_>, @@ -646,3 +604,194 @@ mod comparison_bench { } } } + +/// Run sanity checks on the specified fid tree +/// +/// 1. No "orphan" child value, any child value has a parent +/// 2. Any docid in the child appears in the parent +/// 3. No docid in the parent is missing from all its children +/// 4. no group is bigger than max_group_size +/// 5. Less than 50% of groups are bigger than group_size +/// 6. group size matches the number of children +/// 7. max_level is < 255 +pub(crate) fn sanity_checks( + index: &Index, + rtxn: &heed::RoTxn, + field_id: FieldId, + facet_type: FacetType, + group_size: usize, + _min_level_size: usize, // might add a check on level size later + max_group_size: usize, +) -> Result<()> { + tracing::info!(%field_id, ?facet_type, "performing sanity checks"); + let database = match facet_type { + FacetType::String => { + index.facet_id_string_docids.remap_key_type::>() + } + FacetType::Number => { + index.facet_id_f64_docids.remap_key_type::>() + } + }; + + let leaf_prefix: FacetGroupKey<&[u8]> = FacetGroupKey { field_id, level: 0, left_bound: &[] }; + + let leaf_it = database.prefix_iter(rtxn, &leaf_prefix)?; + + let max_level = get_highest_level(rtxn, database, field_id)?; + if max_level == u8::MAX { + panic!("max_level == 255"); + } + + for leaf in leaf_it { + let (leaf_facet_value, leaf_docids) = leaf?; + let mut current_level = 0; + + let mut current_parent_facet_value: Option> = None; + let mut current_parent_docids: Option = None; + loop { + current_level += 1; + if current_level >= max_level { + break; + } + let parent_key_right_bound = FacetGroupKey { + field_id, + level: current_level, + left_bound: leaf_facet_value.left_bound, + }; + let (parent_facet_value, parent_docids) = database + .get_lower_than_or_equal_to(rtxn, &parent_key_right_bound)? + .expect("no parent found"); + if parent_facet_value.level != current_level { + panic!( + "wrong parent level, found_level={}, expected_level={}", + parent_facet_value.level, current_level + ); + } + if parent_facet_value.field_id != field_id { + panic!("wrong parent fid"); + } + if parent_facet_value.left_bound > leaf_facet_value.left_bound { + panic!("wrong parent left bound"); + } + + if !leaf_docids.bitmap.is_subset(&parent_docids.bitmap) { + panic!( + "missing docids from leaf in parent, current_level={}, parent={}, child={}, missing={missing:?}, child_len={}, child={:?}", + current_level, + facet_to_string(parent_facet_value.left_bound, facet_type), + facet_to_string(leaf_facet_value.left_bound, facet_type), + leaf_docids.bitmap.len(), + leaf_docids.bitmap.clone(), + missing=leaf_docids.bitmap - parent_docids.bitmap, + ) + } + + if let Some(current_parent_facet_value) = current_parent_facet_value { + if current_parent_facet_value.field_id != parent_facet_value.field_id { + panic!("wrong parent parent fid"); + } + if current_parent_facet_value.level + 1 != parent_facet_value.level { + panic!("wrong parent parent level"); + } + if current_parent_facet_value.left_bound < parent_facet_value.left_bound { + panic!("wrong parent parent left bound"); + } + } + + if let Some(current_parent_docids) = current_parent_docids { + if !current_parent_docids.bitmap.is_subset(&parent_docids.bitmap) { + panic!("missing docids from intermediate node in parent, parent_level={}, parent={}, intermediate={}, missing={missing:?}, intermediate={:?}", + parent_facet_value.level, + facet_to_string(parent_facet_value.left_bound, facet_type), + facet_to_string(current_parent_facet_value.unwrap().left_bound, facet_type), + current_parent_docids.bitmap.clone(), + missing=current_parent_docids.bitmap - parent_docids.bitmap, + ); + } + } + + current_parent_facet_value = Some(parent_facet_value); + current_parent_docids = Some(parent_docids); + } + } + tracing::info!(%field_id, ?facet_type, "checked all leaves"); + + let mut current_level = max_level; + let mut greater_than_group = 0usize; + let mut total = 0usize; + loop { + if current_level == 0 { + break; + } + let child_level = current_level - 1; + tracing::info!(%field_id, ?facet_type, %current_level, "checked groups for level"); + let level_groups_prefix: FacetGroupKey<&[u8]> = + FacetGroupKey { field_id, level: current_level, left_bound: &[] }; + let mut level_groups_it = database.prefix_iter(rtxn, &level_groups_prefix)?.peekable(); + + 'group_it: loop { + let Some(group) = level_groups_it.next() else { break 'group_it }; + + let (group_facet_value, group_docids) = group?; + let child_left_bound = group_facet_value.left_bound.to_owned(); + let mut expected_docids = RoaringBitmap::new(); + let mut expected_size = 0usize; + let right_bound = level_groups_it + .peek() + .and_then(|res| res.as_ref().ok()) + .map(|(key, _)| key.left_bound); + let child_left_bound = FacetGroupKey { + field_id, + level: child_level, + left_bound: child_left_bound.as_slice(), + }; + let child_left_bound = Bound::Included(&child_left_bound); + let child_right_bound; + let child_right_bound = if let Some(right_bound) = right_bound { + child_right_bound = + FacetGroupKey { field_id, level: child_level, left_bound: right_bound }; + Bound::Excluded(&child_right_bound) + } else { + Bound::Unbounded + }; + let children = database.range(rtxn, &(child_left_bound, child_right_bound))?; + for child in children { + let (child_facet_value, child_docids) = child?; + if child_facet_value.field_id != field_id { + break; + } + if child_facet_value.level != child_level { + break; + } + expected_size += 1; + expected_docids |= &child_docids.bitmap; + } + assert_eq!(expected_size, group_docids.size as usize); + assert!(expected_size <= max_group_size); + assert_eq!(expected_docids, group_docids.bitmap); + total += 1; + if expected_size > group_size { + greater_than_group += 1; + } + } + + current_level -= 1; + } + if greater_than_group * 2 > total { + panic!("too many groups have a size > group_size"); + } + + tracing::info!("sanity checks OK"); + + Ok(()) +} + +fn facet_to_string(facet_value: &[u8], facet_type: FacetType) -> String { + match facet_type { + FacetType::String => bstr::BStr::new(facet_value).to_string(), + FacetType::Number => match OrderedF64Codec::bytes_decode(facet_value) { + Ok(value) => value.to_string(), + Err(e) => format!("error: {e} (bytes: {facet_value:?}"), + }, + } +} diff --git a/crates/milli/src/update/facet/new_incremental.rs b/crates/milli/src/update/facet/new_incremental.rs new file mode 100644 index 000000000..0890f8593 --- /dev/null +++ b/crates/milli/src/update/facet/new_incremental.rs @@ -0,0 +1,498 @@ +use std::ops::Bound; + +use heed::types::{Bytes, DecodeIgnore}; +use heed::{BytesDecode as _, Database, RwTxn}; +use roaring::RoaringBitmap; + +use crate::facet::FacetType; +use crate::heed_codec::facet::{ + FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, +}; +use crate::heed_codec::BytesRefCodec; +use crate::search::facet::get_highest_level; +use crate::update::valid_facet_value; +use crate::{FieldId, Index, Result}; + +pub struct FacetsUpdateIncremental { + inner: FacetsUpdateIncrementalInner, + delta_data: Vec, +} + +struct FacetsUpdateIncrementalInner { + db: Database, FacetGroupValueCodec>, + field_id: FieldId, + group_size: u8, + min_level_size: u8, + max_group_size: u8, +} + +impl FacetsUpdateIncremental { + pub fn new( + index: &Index, + facet_type: FacetType, + field_id: FieldId, + delta_data: Vec, + group_size: u8, + min_level_size: u8, + max_group_size: u8, + ) -> Self { + FacetsUpdateIncremental { + inner: FacetsUpdateIncrementalInner { + db: match facet_type { + FacetType::String => index + .facet_id_string_docids + .remap_key_type::>(), + FacetType::Number => index + .facet_id_f64_docids + .remap_key_type::>(), + }, + field_id, + group_size, + min_level_size, + max_group_size, + }, + + delta_data, + } + } + + #[tracing::instrument(level = "trace", skip_all, target = "indexing::facets::incremental")] + pub fn execute(mut self, wtxn: &mut RwTxn) -> Result<()> { + if self.delta_data.is_empty() { + return Ok(()); + } + self.delta_data.sort_unstable_by( + |FacetFieldIdChange { facet_value: left, .. }, + FacetFieldIdChange { facet_value: right, .. }| { + left.cmp(right) + // sort in **reverse** lexicographic order + .reverse() + }, + ); + + self.inner.find_changed_parents(wtxn, self.delta_data)?; + + self.inner.add_or_delete_level(wtxn) + } +} + +impl FacetsUpdateIncrementalInner { + /// WARNING: `changed_children` must be sorted in **reverse** lexicographic order. + fn find_changed_parents( + &self, + wtxn: &mut RwTxn, + mut changed_children: Vec, + ) -> Result<()> { + let mut changed_parents = vec![]; + for child_level in 0u8..u8::MAX { + // child_level < u8::MAX by construction + let parent_level = child_level + 1; + let parent_level_left_bound: FacetGroupKey<&[u8]> = + FacetGroupKey { field_id: self.field_id, level: parent_level, left_bound: &[] }; + + let mut last_parent: Option> = None; + let mut child_it = changed_children + // drain all changed children + .drain(..) + // keep only children whose value is valid in the LMDB sense + .filter(|child| valid_facet_value(&child.facet_value)); + // `while let` rather than `for` because we advance `child_it` inside of the loop + 'current_level: while let Some(child) = child_it.next() { + if let Some(last_parent) = &last_parent { + if &child.facet_value >= last_parent { + self.compute_parent_group(wtxn, child_level, child.facet_value)?; + continue 'current_level; + } + } + + // need to find a new parent + let parent_key_prefix = FacetGroupKey { + field_id: self.field_id, + level: parent_level, + left_bound: &*child.facet_value, + }; + + let parent = self + .db + .remap_data_type::() + .rev_range( + wtxn, + &( + Bound::Excluded(&parent_level_left_bound), + Bound::Included(&parent_key_prefix), + ), + )? + .next(); + + match parent { + Some(Ok((parent_key, _parent_value))) => { + // found parent, cache it for next keys + last_parent = Some(parent_key.left_bound.to_owned().into_boxed_slice()); + + // add to modified list for parent level + changed_parents.push(FacetFieldIdChange { + facet_value: parent_key.left_bound.to_owned().into_boxed_slice(), + }); + self.compute_parent_group(wtxn, child_level, child.facet_value)?; + } + Some(Err(err)) => return Err(err.into()), + None => { + // no parent for that key + let mut parent_it = self + .db + .remap_data_type::() + .prefix_iter_mut(wtxn, &parent_level_left_bound)?; + match parent_it.next() { + // 1. left of the current left bound, or + Some(Ok((first_key, _first_value))) => { + // make sure we don't spill on the neighboring fid (level also included defensively) + if first_key.field_id != self.field_id + || first_key.level != parent_level + { + // max level reached, exit + drop(parent_it); + self.compute_parent_group( + wtxn, + child_level, + child.facet_value, + )?; + for child in child_it.by_ref() { + self.compute_parent_group( + wtxn, + child_level, + child.facet_value, + )?; + } + return Ok(()); + } + // remove old left bound + unsafe { parent_it.del_current()? }; + drop(parent_it); + changed_parents.push(FacetFieldIdChange { + facet_value: child.facet_value.clone(), + }); + self.compute_parent_group(wtxn, child_level, child.facet_value)?; + // pop all elements in order to visit the new left bound + let new_left_bound = + &mut changed_parents.last_mut().unwrap().facet_value; + for child in child_it.by_ref() { + new_left_bound.clone_from(&child.facet_value); + + self.compute_parent_group( + wtxn, + child_level, + child.facet_value, + )?; + } + } + Some(Err(err)) => return Err(err.into()), + // 2. max level reached, exit + None => { + drop(parent_it); + self.compute_parent_group(wtxn, child_level, child.facet_value)?; + for child in child_it.by_ref() { + self.compute_parent_group( + wtxn, + child_level, + child.facet_value, + )?; + } + return Ok(()); + } + } + } + } + } + if changed_parents.is_empty() { + return Ok(()); + } + drop(child_it); + std::mem::swap(&mut changed_children, &mut changed_parents); + // changed_parents is now empty because changed_children was emptied by the drain + } + Ok(()) + } + + fn compute_parent_group( + &self, + wtxn: &mut RwTxn<'_>, + parent_level: u8, + parent_left_bound: Box<[u8]>, + ) -> Result<()> { + let mut range_left_bound: Vec = parent_left_bound.into(); + if parent_level == 0 { + return Ok(()); + } + let child_level = parent_level - 1; + + let parent_key = FacetGroupKey { + field_id: self.field_id, + level: parent_level, + left_bound: &*range_left_bound, + }; + let child_right_bound = self + .db + .remap_data_type::() + .get_greater_than(wtxn, &parent_key)? + .and_then( + |( + FacetGroupKey { + level: right_level, + field_id: right_fid, + left_bound: right_bound, + }, + _, + )| { + if parent_level != right_level || self.field_id != right_fid { + // there was a greater key, but with a greater level or fid, so not a sibling to the parent: ignore + return None; + } + Some(right_bound.to_owned()) + }, + ); + let child_right_bound = match &child_right_bound { + Some(right_bound) => Bound::Excluded(FacetGroupKey { + left_bound: right_bound.as_slice(), + field_id: self.field_id, + level: child_level, + }), + None => Bound::Unbounded, + }; + + let child_left_key = FacetGroupKey { + field_id: self.field_id, + level: child_level, + left_bound: &*range_left_bound, + }; + let mut child_left_bound = Bound::Included(child_left_key); + + loop { + // do a first pass on the range to find the number of children + let child_count = self + .db + .remap_data_type::() + .range(wtxn, &(child_left_bound, child_right_bound))? + .take(self.max_group_size as usize * 2) + .count(); + let mut child_it = self.db.range(wtxn, &(child_left_bound, child_right_bound))?; + + // pick the right group_size depending on the number of children + let group_size = if child_count >= self.max_group_size as usize * 2 { + // more than twice the max_group_size => there will be space for at least 2 groups of max_group_size + self.max_group_size as usize + } else if child_count >= self.group_size as usize { + // size in [group_size, max_group_size * 2[ + // divided by 2 it is between [group_size / 2, max_group_size[ + // this ensures that the tree is balanced + child_count / 2 + } else { + // take everything + child_count + }; + + let res: Result<_> = child_it + .by_ref() + .take(group_size) + // stop if we go to the next level or field id + .take_while(|res| match res { + Ok((child_key, _)) => { + child_key.field_id == self.field_id && child_key.level == child_level + } + Err(_) => true, + }) + .try_fold( + (None, FacetGroupValue { size: 0, bitmap: Default::default() }), + |(bounds, mut group_value), child_res| { + let (child_key, child_value) = child_res?; + let bounds = match bounds { + Some((left_bound, _)) => Some((left_bound, child_key.left_bound)), + None => Some((child_key.left_bound, child_key.left_bound)), + }; + // max_group_size <= u8::MAX + group_value.size += 1; + group_value.bitmap |= &child_value.bitmap; + Ok((bounds, group_value)) + }, + ); + + let (bounds, group_value) = res?; + + let Some((group_left_bound, right_bound)) = bounds else { + let update_key = FacetGroupKey { + field_id: self.field_id, + level: parent_level, + left_bound: &*range_left_bound, + }; + drop(child_it); + if let Bound::Included(_) = child_left_bound { + self.db.delete(wtxn, &update_key)?; + } + + break; + }; + + drop(child_it); + let current_left_bound = group_left_bound.to_owned(); + + let delete_old_bound = match child_left_bound { + Bound::Included(bound) => { + if bound.left_bound != current_left_bound { + Some(range_left_bound.clone()) + } else { + None + } + } + _ => None, + }; + + range_left_bound.clear(); + range_left_bound.extend_from_slice(right_bound); + let child_left_key = FacetGroupKey { + field_id: self.field_id, + level: child_level, + left_bound: range_left_bound.as_slice(), + }; + child_left_bound = Bound::Excluded(child_left_key); + + if let Some(old_bound) = delete_old_bound { + let update_key = FacetGroupKey { + field_id: self.field_id, + level: parent_level, + left_bound: old_bound.as_slice(), + }; + self.db.delete(wtxn, &update_key)?; + } + + let update_key = FacetGroupKey { + field_id: self.field_id, + level: parent_level, + left_bound: current_left_bound.as_slice(), + }; + if group_value.bitmap.is_empty() { + self.db.delete(wtxn, &update_key)?; + } else { + self.db.put(wtxn, &update_key, &group_value)?; + } + } + + Ok(()) + } + + /// Check whether the highest level has exceeded `min_level_size` * `self.group_size`. + /// If it has, we must build an addition level above it. + /// Then check whether the highest level is under `min_level_size`. + /// If it has, we must remove the complete level. + pub(crate) fn add_or_delete_level(&self, txn: &mut RwTxn<'_>) -> Result<()> { + let highest_level = get_highest_level(txn, self.db, self.field_id)?; + let mut highest_level_prefix = vec![]; + highest_level_prefix.extend_from_slice(&self.field_id.to_be_bytes()); + highest_level_prefix.push(highest_level); + + let size_highest_level = + self.db.remap_types::().prefix_iter(txn, &highest_level_prefix)?.count(); + + if size_highest_level >= self.group_size as usize * self.min_level_size as usize { + self.add_level(txn, highest_level, &highest_level_prefix, size_highest_level) + } else if size_highest_level < self.min_level_size as usize && highest_level != 0 { + self.delete_level(txn, &highest_level_prefix) + } else { + Ok(()) + } + } + + /// Delete a level. + fn delete_level(&self, txn: &mut RwTxn<'_>, highest_level_prefix: &[u8]) -> Result<()> { + let mut to_delete = vec![]; + let mut iter = + self.db.remap_types::().prefix_iter(txn, highest_level_prefix)?; + for el in iter.by_ref() { + let (k, _) = el?; + to_delete.push( + FacetGroupKeyCodec::::bytes_decode(k) + .map_err(heed::Error::Encoding)? + .into_owned(), + ); + } + drop(iter); + for k in to_delete { + self.db.delete(txn, &k.as_ref())?; + } + Ok(()) + } + + /// Build an additional level for the field id. + fn add_level( + &self, + txn: &mut RwTxn<'_>, + highest_level: u8, + highest_level_prefix: &[u8], + size_highest_level: usize, + ) -> Result<()> { + let mut groups_iter = self + .db + .remap_types::() + .prefix_iter(txn, highest_level_prefix)?; + + let nbr_new_groups = size_highest_level / self.group_size as usize; + let nbr_leftover_elements = size_highest_level % self.group_size as usize; + + let mut to_add = vec![]; + for _ in 0..nbr_new_groups { + let mut first_key = None; + let mut values = RoaringBitmap::new(); + for _ in 0..self.group_size { + let (key_bytes, value_i) = groups_iter.next().unwrap()?; + let key_i = FacetGroupKeyCodec::::bytes_decode(key_bytes) + .map_err(heed::Error::Encoding)?; + + if first_key.is_none() { + first_key = Some(key_i); + } + values |= value_i.bitmap; + } + let key = FacetGroupKey { + field_id: self.field_id, + level: highest_level + 1, + left_bound: first_key.unwrap().left_bound, + }; + let value = FacetGroupValue { size: self.group_size, bitmap: values }; + to_add.push((key.into_owned(), value)); + } + // now we add the rest of the level, in case its size is > group_size * min_level_size + // this can indeed happen if the min_level_size parameter changes between two calls to `insert` + if nbr_leftover_elements > 0 { + let mut first_key = None; + let mut values = RoaringBitmap::new(); + for _ in 0..nbr_leftover_elements { + let (key_bytes, value_i) = groups_iter.next().unwrap()?; + let key_i = FacetGroupKeyCodec::::bytes_decode(key_bytes) + .map_err(heed::Error::Encoding)?; + + if first_key.is_none() { + first_key = Some(key_i); + } + values |= value_i.bitmap; + } + let key = FacetGroupKey { + field_id: self.field_id, + level: highest_level + 1, + left_bound: first_key.unwrap().left_bound, + }; + // Note: nbr_leftover_elements can be casted to a u8 since it is bounded by `max_group_size` + // when it is created above. + let value = FacetGroupValue { size: nbr_leftover_elements as u8, bitmap: values }; + to_add.push((key.into_owned(), value)); + } + + drop(groups_iter); + for (key, value) in to_add { + self.db.put(txn, &key.as_ref(), &value)?; + } + Ok(()) + } +} + +#[derive(Debug)] +pub struct FacetFieldIdChange { + pub facet_value: Box<[u8]>, +} diff --git a/crates/milli/src/update/index_documents/helpers/mod.rs b/crates/milli/src/update/index_documents/helpers/mod.rs index c188e324d..5dec54ffc 100644 --- a/crates/milli/src/update/index_documents/helpers/mod.rs +++ b/crates/milli/src/update/index_documents/helpers/mod.rs @@ -10,10 +10,14 @@ use fst::{IntoStreamer, Streamer}; pub use grenad_helpers::*; pub use merge_functions::*; -use crate::MAX_WORD_LENGTH; +use crate::MAX_LMDB_KEY_LENGTH; pub fn valid_lmdb_key(key: impl AsRef<[u8]>) -> bool { - key.as_ref().len() <= MAX_WORD_LENGTH * 2 && !key.as_ref().is_empty() + key.as_ref().len() <= MAX_LMDB_KEY_LENGTH - 3 && !key.as_ref().is_empty() +} + +pub fn valid_facet_value(facet_value: impl AsRef<[u8]>) -> bool { + facet_value.as_ref().len() <= MAX_LMDB_KEY_LENGTH - 3 && !facet_value.as_ref().is_empty() } /// Divides one slice into two at an index, returns `None` if mid is out of bounds. diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index d416c1a2b..154db7875 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -2093,33 +2093,6 @@ mod tests { index.add_documents(doc1).unwrap(); } - #[cfg(feature = "default")] - #[test] - fn store_detected_script_and_language_per_document_during_indexing() { - use charabia::{Language, Script}; - let index = TempIndex::new(); - index - .add_documents(documents!([ - { "id": 1, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" }, - { "id": 2, "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" }, - { "id": 3, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" }, - { "id": 4, "title": "関西国際空港限定トートバッグ すもももももももものうち" }, - { "id": 5, "title": "ภาษาไทยง่ายนิดเดียว" }, - { "id": 6, "title": "The quick 在尊嚴和權利上一律平等。" }, - ])) - .unwrap(); - - let rtxn = index.read_txn().unwrap(); - let key_jpn = (Script::Cj, Language::Jpn); - let key_cmn = (Script::Cj, Language::Cmn); - let cj_jpn_docs = index.script_language_documents_ids(&rtxn, &key_jpn).unwrap().unwrap(); - let cj_cmn_docs = index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap(); - let expected_cj_jpn_docids = [3].iter().collect(); - assert_eq!(cj_jpn_docs, expected_cj_jpn_docids); - let expected_cj_cmn_docids = [1, 5].iter().collect(); - assert_eq!(cj_cmn_docs, expected_cj_cmn_docids); - } - #[test] fn add_and_delete_documents_in_single_transform() { let mut index = TempIndex::new(); @@ -3335,6 +3308,44 @@ mod tests { rtxn.commit().unwrap(); } + #[test] + fn incremental_update_without_changing_facet_distribution() { + let index = TempIndex::new(); + index + .add_documents(documents!([ + {"id": 0, "some_field": "aaa", "other_field": "aaa" }, + {"id": 1, "some_field": "bbb", "other_field": "bbb" }, + ])) + .unwrap(); + { + let rtxn = index.read_txn().unwrap(); + // count field distribution + let results = index.field_distribution(&rtxn).unwrap(); + assert_eq!(Some(&2), results.get("id")); + assert_eq!(Some(&2), results.get("some_field")); + assert_eq!(Some(&2), results.get("other_field")); + } + + let mut index = index; + index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments; + + index + .add_documents(documents!([ + {"id": 0, "other_field": "bbb" }, + {"id": 1, "some_field": "ccc" }, + ])) + .unwrap(); + + { + let rtxn = index.read_txn().unwrap(); + // count field distribution + let results = index.field_distribution(&rtxn).unwrap(); + assert_eq!(Some(&2), results.get("id")); + assert_eq!(Some(&2), results.get("some_field")); + assert_eq!(Some(&2), results.get("other_field")); + } + } + #[test] fn delete_words_exact_attributes() { let index = TempIndex::new(); diff --git a/crates/milli/src/update/index_documents/snapshots/mod.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap index c45c350e7..7ab60b90d 100644 --- a/crates/milli/src/update/index_documents/snapshots/mod.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap +++ b/crates/milli/src/update/index_documents/snapshots/mod.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap @@ -1,5 +1,5 @@ --- -source: milli/src/update/index_documents/mod.rs +source: crates/milli/src/update/index_documents/mod.rs --- 3 0 48.9021 1 [19, ] 3 0 49.9314 1 [17, ] @@ -15,6 +15,11 @@ source: milli/src/update/index_documents/mod.rs 3 0 50.7453 1 [7, ] 3 0 50.8466 1 [10, ] 3 0 51.0537 1 [9, ] +3 1 48.9021 2 [17, 19, ] +3 1 50.1793 3 [13, 14, 15, ] +3 1 50.4502 4 [0, 3, 8, 12, ] +3 1 50.6312 2 [1, 2, ] +3 1 50.7453 3 [7, 9, 10, ] 4 0 2.271 1 [17, ] 4 0 2.3708 1 [19, ] 4 0 2.7637 1 [14, ] @@ -28,4 +33,3 @@ source: milli/src/update/index_documents/mod.rs 4 0 3.6957 1 [9, ] 4 0 3.9623 1 [12, ] 4 0 4.337 1 [10, ] - diff --git a/crates/milli/src/update/new/extract/cache.rs b/crates/milli/src/update/new/extract/cache.rs index e2c8bb5fe..47bca6193 100644 --- a/crates/milli/src/update/new/extract/cache.rs +++ b/crates/milli/src/update/new/extract/cache.rs @@ -679,9 +679,7 @@ impl DelAddRoaringBitmap { let del = self.del.get_or_insert_with(RoaringBitmap::new); let mut iter = bbbul.iter_and_clear(); while let Some(block) = iter.next_block() { - let iter = block.iter().copied(); - let block = RoaringBitmap::from_sorted_iter(iter).unwrap(); - *del |= block; + del.extend(block); } } @@ -689,9 +687,7 @@ impl DelAddRoaringBitmap { let add = self.add.get_or_insert_with(RoaringBitmap::new); let mut iter = bbbul.iter_and_clear(); while let Some(block) = iter.next_block() { - let iter = block.iter().copied(); - let block = RoaringBitmap::from_sorted_iter(iter).unwrap(); - *add |= block; + add.extend(block); } } } diff --git a/crates/milli/src/update/new/extract/documents.rs b/crates/milli/src/update/new/extract/documents.rs index 832e8c463..01041af42 100644 --- a/crates/milli/src/update/new/extract/documents.rs +++ b/crates/milli/src/update/new/extract/documents.rs @@ -94,7 +94,8 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> { .or_default(); *entry -= 1; } - let content = update.updated(); + let content = + update.merged(&context.rtxn, context.index, &context.db_fields_ids_map)?; let geo_iter = content .geo_field() .transpose() diff --git a/crates/milli/src/update/new/extract/faceted/extract_facets.rs b/crates/milli/src/update/new/extract/faceted/extract_facets.rs index 66ed6cbfb..7e0484e39 100644 --- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs +++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs @@ -28,7 +28,7 @@ use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH}; pub struct FacetedExtractorData<'a, 'b> { attributes_to_extract: &'a [&'a str], sender: &'a FieldIdDocidFacetSender<'a, 'b>, - grenad_parameters: GrenadParameters, + grenad_parameters: &'a GrenadParameters, buckets: usize, } @@ -374,7 +374,6 @@ fn truncate_str(s: &str) -> &str { impl FacetedDocidsExtractor { #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")] pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( - grenad_parameters: GrenadParameters, document_changes: &DC, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, extractor_allocs: &'extractor mut ThreadLocal>, @@ -398,7 +397,7 @@ impl FacetedDocidsExtractor { let extractor = FacetedExtractorData { attributes_to_extract: &attributes_to_extract, - grenad_parameters, + grenad_parameters: indexing_context.grenad_parameters, buckets: rayon::current_num_threads(), sender, }; diff --git a/crates/milli/src/update/new/extract/mod.rs b/crates/milli/src/update/new/extract/mod.rs index 4bcb918e4..aa0a3d333 100644 --- a/crates/milli/src/update/new/extract/mod.rs +++ b/crates/milli/src/update/new/extract/mod.rs @@ -18,12 +18,10 @@ pub use vectors::EmbeddingExtractor; use super::indexer::document_changes::{DocumentChanges, IndexingContext}; use super::steps::IndexingStep; use super::thread_local::{FullySend, ThreadLocal}; -use crate::update::GrenadParameters; use crate::Result; pub trait DocidsExtractor { fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( - grenad_parameters: GrenadParameters, document_changes: &DC, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, extractor_allocs: &'extractor mut ThreadLocal>, diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs index 952ee91e4..49259cd64 100644 --- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs +++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs @@ -208,7 +208,7 @@ impl<'extractor> WordDocidsCaches<'extractor> { pub struct WordDocidsExtractorData<'a> { tokenizer: &'a DocumentTokenizer<'a>, - grenad_parameters: GrenadParameters, + grenad_parameters: &'a GrenadParameters, buckets: usize, } @@ -240,7 +240,6 @@ pub struct WordDocidsExtractors; impl WordDocidsExtractors { pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( - grenad_parameters: GrenadParameters, document_changes: &DC, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, extractor_allocs: &'extractor mut ThreadLocal>, @@ -288,7 +287,7 @@ impl WordDocidsExtractors { let extractor = WordDocidsExtractorData { tokenizer: &document_tokenizer, - grenad_parameters, + grenad_parameters: indexing_context.grenad_parameters, buckets: rayon::current_num_threads(), }; diff --git a/crates/milli/src/update/new/extract/searchable/mod.rs b/crates/milli/src/update/new/extract/searchable/mod.rs index c4240196a..7c949a3ce 100644 --- a/crates/milli/src/update/new/extract/searchable/mod.rs +++ b/crates/milli/src/update/new/extract/searchable/mod.rs @@ -24,7 +24,7 @@ use crate::{Index, Result, MAX_POSITION_PER_ATTRIBUTE}; pub struct SearchableExtractorData<'a, EX: SearchableExtractor> { tokenizer: &'a DocumentTokenizer<'a>, - grenad_parameters: GrenadParameters, + grenad_parameters: &'a GrenadParameters, buckets: usize, _ex: PhantomData, } @@ -57,7 +57,6 @@ impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor> pub trait SearchableExtractor: Sized + Sync { fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( - grenad_parameters: GrenadParameters, document_changes: &DC, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, extractor_allocs: &'extractor mut ThreadLocal>, @@ -96,7 +95,7 @@ pub trait SearchableExtractor: Sized + Sync { let extractor_data: SearchableExtractorData = SearchableExtractorData { tokenizer: &document_tokenizer, - grenad_parameters, + grenad_parameters: indexing_context.grenad_parameters, buckets: rayon::current_num_threads(), _ex: PhantomData, }; @@ -134,7 +133,6 @@ pub trait SearchableExtractor: Sized + Sync { impl DocidsExtractor for T { fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( - grenad_parameters: GrenadParameters, document_changes: &DC, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, extractor_allocs: &'extractor mut ThreadLocal>, @@ -143,12 +141,6 @@ impl DocidsExtractor for T { where MSP: Fn() -> bool + Sync, { - Self::run_extraction( - grenad_parameters, - document_changes, - indexing_context, - extractor_allocs, - step, - ) + Self::run_extraction(document_changes, indexing_context, extractor_allocs, step) } } diff --git a/crates/milli/src/update/new/indexer/document_changes.rs b/crates/milli/src/update/new/indexer/document_changes.rs index a45fcee85..f77ac7658 100644 --- a/crates/milli/src/update/new/indexer/document_changes.rs +++ b/crates/milli/src/update/new/indexer/document_changes.rs @@ -12,6 +12,7 @@ use crate::progress::{AtomicDocumentStep, Progress}; use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _; use crate::update::new::steps::IndexingStep; use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal}; +use crate::update::GrenadParameters; use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result}; pub struct DocumentChangeContext< @@ -145,6 +146,7 @@ pub struct IndexingContext< pub fields_ids_map_store: &'indexer ThreadLocal>>>, pub must_stop_processing: &'indexer MSP, pub progress: &'indexer Progress, + pub grenad_parameters: &'indexer GrenadParameters, } impl< @@ -207,6 +209,7 @@ pub fn extract< fields_ids_map_store, must_stop_processing, progress, + grenad_parameters: _, }: IndexingContext<'fid, 'indexer, 'index, MSP>, extractor_allocs: &'extractor mut ThreadLocal>, datastore: &'data ThreadLocal, diff --git a/crates/milli/src/update/new/indexer/document_deletion.rs b/crates/milli/src/update/new/indexer/document_deletion.rs index b42a6c859..03f763f18 100644 --- a/crates/milli/src/update/new/indexer/document_deletion.rs +++ b/crates/milli/src/update/new/indexer/document_deletion.rs @@ -166,6 +166,7 @@ mod test { fields_ids_map_store: &fields_ids_map_store, must_stop_processing: &(|| false), progress: &Progress::default(), + grenad_parameters: &Default::default(), }; for _ in 0..3 { diff --git a/crates/milli/src/update/new/indexer/document_operation.rs b/crates/milli/src/update/new/indexer/document_operation.rs index 090c1eb8e..8216742ec 100644 --- a/crates/milli/src/update/new/indexer/document_operation.rs +++ b/crates/milli/src/update/new/indexer/document_operation.rs @@ -13,7 +13,7 @@ use serde_json::Deserializer; use super::super::document_change::DocumentChange; use super::document_changes::{DocumentChangeContext, DocumentChanges}; -use super::retrieve_or_guess_primary_key; +use super::guess_primary_key::retrieve_or_guess_primary_key; use crate::documents::PrimaryKey; use crate::progress::{AtomicPayloadStep, Progress}; use crate::update::new::document::Versions; @@ -252,6 +252,24 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>( previous_offset = iter.byte_offset(); } + if payload.is_empty() { + let result = retrieve_or_guess_primary_key( + rtxn, + index, + new_fields_ids_map, + primary_key_from_op, + None, + ); + match result { + Ok(Ok((pk, _))) => { + primary_key.get_or_insert(pk); + } + Ok(Err(UserError::NoPrimaryKeyCandidateFound)) => (), + Ok(Err(user_error)) => return Err(Error::UserError(user_error)), + Err(error) => return Err(error), + }; + } + Ok(new_docids_version_offsets) } diff --git a/crates/milli/src/update/new/indexer/extract.rs b/crates/milli/src/update/new/indexer/extract.rs new file mode 100644 index 000000000..63536c559 --- /dev/null +++ b/crates/milli/src/update/new/indexer/extract.rs @@ -0,0 +1,310 @@ +use std::collections::BTreeMap; +use std::sync::atomic::AtomicBool; +use std::sync::OnceLock; + +use bumpalo::Bump; +use roaring::RoaringBitmap; +use tracing::Span; + +use super::super::channel::*; +use super::super::extract::*; +use super::super::steps::IndexingStep; +use super::super::thread_local::{FullySend, ThreadLocal}; +use super::super::FacetFieldIdsDelta; +use super::document_changes::{extract, DocumentChanges, IndexingContext}; +use crate::index::IndexEmbeddingConfig; +use crate::proximity::ProximityPrecision; +use crate::update::new::extract::EmbeddingExtractor; +use crate::update::new::merger::merge_and_send_rtree; +use crate::update::new::{merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases}; +use crate::vector::EmbeddingConfigs; +use crate::{Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder}; + +#[allow(clippy::too_many_arguments)] +pub(super) fn extract_all<'pl, 'extractor, DC, MSP>( + document_changes: &DC, + indexing_context: IndexingContext, + indexer_span: Span, + extractor_sender: ExtractorBbqueueSender, + embedders: &EmbeddingConfigs, + extractor_allocs: &'extractor mut ThreadLocal>, + finished_extraction: &AtomicBool, + field_distribution: &mut BTreeMap, + mut index_embeddings: Vec, + document_ids: &mut RoaringBitmap, +) -> Result<(FacetFieldIdsDelta, Vec)> +where + DC: DocumentChanges<'pl>, + MSP: Fn() -> bool + Sync, +{ + let span = + tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract"); + let _entered = span.enter(); + + let index = indexing_context.index; + let rtxn = index.read_txn()?; + + // document but we need to create a function that collects and compresses documents. + let document_sender = extractor_sender.documents(); + let document_extractor = DocumentsExtractor::new(document_sender, embedders); + let datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); + { + let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents"); + let _entered = span.enter(); + extract( + document_changes, + &document_extractor, + indexing_context, + extractor_allocs, + &datastore, + IndexingStep::ExtractingDocuments, + )?; + } + { + let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "documents"); + let _entered = span.enter(); + for document_extractor_data in datastore { + let document_extractor_data = document_extractor_data.0.into_inner(); + for (field, delta) in document_extractor_data.field_distribution_delta { + let current = field_distribution.entry(field).or_default(); + // adding the delta should never cause a negative result, as we are removing fields that previously existed. + *current = current.saturating_add_signed(delta); + } + document_extractor_data.docids_delta.apply_to(document_ids); + } + + field_distribution.retain(|_, v| *v != 0); + } + + let facet_field_ids_delta; + + { + let caches = { + let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "faceted"); + let _entered = span.enter(); + + FacetedDocidsExtractor::run_extraction( + document_changes, + indexing_context, + extractor_allocs, + &extractor_sender.field_id_docid_facet_sender(), + IndexingStep::ExtractingFacets, + )? + }; + + { + let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted"); + let _entered = span.enter(); + + facet_field_ids_delta = merge_and_send_facet_docids( + caches, + FacetDatabases::new(index), + index, + &rtxn, + extractor_sender.facet_docids(), + )?; + } + } + + { + let WordDocidsCaches { + word_docids, + word_fid_docids, + exact_word_docids, + word_position_docids, + fid_word_count_docids, + } = { + let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids"); + let _entered = span.enter(); + + WordDocidsExtractors::run_extraction( + document_changes, + indexing_context, + extractor_allocs, + IndexingStep::ExtractingWords, + )? + }; + + { + let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids"); + let _entered = span.enter(); + merge_and_send_docids( + word_docids, + index.word_docids.remap_types(), + index, + extractor_sender.docids::(), + &indexing_context.must_stop_processing, + )?; + } + + { + let span = + tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids"); + let _entered = span.enter(); + merge_and_send_docids( + word_fid_docids, + index.word_fid_docids.remap_types(), + index, + extractor_sender.docids::(), + &indexing_context.must_stop_processing, + )?; + } + + { + let span = + tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids"); + let _entered = span.enter(); + merge_and_send_docids( + exact_word_docids, + index.exact_word_docids.remap_types(), + index, + extractor_sender.docids::(), + &indexing_context.must_stop_processing, + )?; + } + + { + let span = + tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids"); + let _entered = span.enter(); + merge_and_send_docids( + word_position_docids, + index.word_position_docids.remap_types(), + index, + extractor_sender.docids::(), + &indexing_context.must_stop_processing, + )?; + } + + { + let span = + tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids"); + let _entered = span.enter(); + merge_and_send_docids( + fid_word_count_docids, + index.field_id_word_count_docids.remap_types(), + index, + extractor_sender.docids::(), + &indexing_context.must_stop_processing, + )?; + } + } + + // run the proximity extraction only if the precision is by word + // this works only if the settings didn't change during this transaction. + let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default(); + if proximity_precision == ProximityPrecision::ByWord { + let caches = { + let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids"); + let _entered = span.enter(); + + ::run_extraction( + document_changes, + indexing_context, + extractor_allocs, + IndexingStep::ExtractingWordProximity, + )? + }; + + { + let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids"); + let _entered = span.enter(); + + merge_and_send_docids( + caches, + index.word_pair_proximity_docids.remap_types(), + index, + extractor_sender.docids::(), + &indexing_context.must_stop_processing, + )?; + } + } + + 'vectors: { + if index_embeddings.is_empty() { + break 'vectors; + } + + let embedding_sender = extractor_sender.embeddings(); + let extractor = EmbeddingExtractor::new( + embedders, + embedding_sender, + field_distribution, + request_threads(), + ); + let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors"); + let _entered = span.enter(); + + extract( + document_changes, + &extractor, + indexing_context, + extractor_allocs, + &datastore, + IndexingStep::ExtractingEmbeddings, + )?; + } + { + let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors"); + let _entered = span.enter(); + + for config in &mut index_embeddings { + 'data: for data in datastore.iter_mut() { + let data = &mut data.get_mut().0; + let Some(deladd) = data.remove(&config.name) else { + continue 'data; + }; + deladd.apply_to(&mut config.user_provided); + } + } + } + } + + 'geo: { + let Some(extractor) = GeoExtractor::new(&rtxn, index, *indexing_context.grenad_parameters)? + else { + break 'geo; + }; + let datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); + + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "geo"); + let _entered = span.enter(); + + extract( + document_changes, + &extractor, + indexing_context, + extractor_allocs, + &datastore, + IndexingStep::WritingGeoPoints, + )?; + } + + merge_and_send_rtree( + datastore, + &rtxn, + index, + extractor_sender.geo(), + &indexing_context.must_stop_processing, + )?; + } + indexing_context.progress.update_progress(IndexingStep::WritingToDatabase); + finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed); + + Result::Ok((facet_field_ids_delta, index_embeddings)) +} + +fn request_threads() -> &'static ThreadPoolNoAbort { + static REQUEST_THREADS: OnceLock = OnceLock::new(); + + REQUEST_THREADS.get_or_init(|| { + ThreadPoolNoAbortBuilder::new() + .num_threads(crate::vector::REQUEST_PARALLELISM) + .thread_name(|index| format!("embedding-request-{index}")) + .build() + .unwrap() + }) +} diff --git a/crates/milli/src/update/new/indexer/guess_primary_key.rs b/crates/milli/src/update/new/indexer/guess_primary_key.rs new file mode 100644 index 000000000..f0eb82b8d --- /dev/null +++ b/crates/milli/src/update/new/indexer/guess_primary_key.rs @@ -0,0 +1,85 @@ +use bumparaw_collections::RawMap; +use heed::RoTxn; +use rustc_hash::FxBuildHasher; + +use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY}; +use crate::update::new::StdResult; +use crate::{FieldsIdsMap, Index, Result, UserError}; + +/// Returns the primary key that has already been set for this index or the +/// one we will guess by searching for the first key that contains "id" as a substring, +/// and whether the primary key changed +pub fn retrieve_or_guess_primary_key<'a>( + rtxn: &'a RoTxn<'a>, + index: &Index, + new_fields_ids_map: &mut FieldsIdsMap, + primary_key_from_op: Option<&'a str>, + first_document: Option>, +) -> Result, bool), UserError>> { + // make sure that we have a declared primary key, either fetching it from the index or attempting to guess it. + + // do we have an existing declared primary key? + let (primary_key, has_changed) = if let Some(primary_key_from_db) = index.primary_key(rtxn)? { + // did we request a primary key in the operation? + match primary_key_from_op { + // we did, and it is different from the DB one + Some(primary_key_from_op) if primary_key_from_op != primary_key_from_db => { + return Ok(Err(UserError::PrimaryKeyCannotBeChanged( + primary_key_from_db.to_string(), + ))); + } + _ => (primary_key_from_db, false), + } + } else { + // no primary key in the DB => let's set one + // did we request a primary key in the operation? + let primary_key = if let Some(primary_key_from_op) = primary_key_from_op { + // set primary key from operation + primary_key_from_op + } else { + // guess primary key + let first_document = match first_document { + Some(document) => document, + // previous indexer when no pk is set + we send an empty payload => index_primary_key_no_candidate_found + None => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)), + }; + + let guesses: Result> = first_document + .keys() + .filter_map(|name| { + let Some(_) = new_fields_ids_map.insert(name) else { + return Some(Err(UserError::AttributeLimitReached.into())); + }; + name.to_lowercase().ends_with(DEFAULT_PRIMARY_KEY).then_some(Ok(name)) + }) + .collect(); + + let mut guesses = guesses?; + + // sort the keys in lexicographical order, so that fields are always in the same order. + guesses.sort_unstable(); + + match guesses.as_slice() { + [] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)), + [name] => { + tracing::info!("Primary key was not specified in index. Inferred to '{name}'"); + *name + } + multiple => { + return Ok(Err(UserError::MultiplePrimaryKeyCandidatesFound { + candidates: multiple + .iter() + .map(|candidate| candidate.to_string()) + .collect(), + })) + } + } + }; + (primary_key, true) + }; + + match PrimaryKey::new_or_insert(primary_key, new_fields_ids_map) { + Ok(primary_key) => Ok(Ok((primary_key, has_changed))), + Err(err) => Ok(Err(err)), + } +} diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index a850c0d03..1cf83f2d2 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -1,59 +1,37 @@ -use std::cmp::Ordering; use std::sync::atomic::AtomicBool; -use std::sync::{OnceLock, RwLock}; +use std::sync::RwLock; use std::thread::{self, Builder}; use big_s::S; -use bumparaw_collections::RawMap; -use document_changes::{extract, DocumentChanges, IndexingContext}; +use document_changes::{DocumentChanges, IndexingContext}; pub use document_deletion::DocumentDeletion; pub use document_operation::{DocumentOperation, PayloadStats}; use hashbrown::HashMap; -use heed::types::{Bytes, DecodeIgnore, Str}; -use heed::{RoTxn, RwTxn}; -use itertools::{merge_join_by, EitherOrBoth}; +use heed::RwTxn; pub use partial_dump::PartialDump; -use rand::SeedableRng as _; -use rustc_hash::FxBuildHasher; -use time::OffsetDateTime; pub use update_by_function::UpdateByFunction; +use write::{build_vectors, update_index, write_to_db}; use super::channel::*; -use super::extract::*; -use super::facet_search_builder::FacetSearchBuilder; -use super::merger::FacetFieldIdsDelta; use super::steps::IndexingStep; use super::thread_local::ThreadLocal; -use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder}; -use super::words_prefix_docids::{ - compute_word_prefix_docids, compute_word_prefix_fid_docids, compute_word_prefix_position_docids, -}; -use super::StdResult; -use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY}; -use crate::facet::FacetType; +use crate::documents::PrimaryKey; use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; -use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY}; use crate::progress::Progress; -use crate::proximity::ProximityPrecision; -use crate::update::del_add::DelAdd; -use crate::update::new::extract::EmbeddingExtractor; -use crate::update::new::merger::merge_and_send_rtree; -use crate::update::new::words_prefix_docids::compute_exact_word_prefix_docids; -use crate::update::new::{merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases}; -use crate::update::settings::InnerIndexSettings; -use crate::update::{FacetsUpdateBulk, GrenadParameters}; -use crate::vector::{ArroyWrapper, EmbeddingConfigs, Embeddings}; -use crate::{ - Error, FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort, - ThreadPoolNoAbortBuilder, UserError, -}; +use crate::update::GrenadParameters; +use crate::vector::{ArroyWrapper, EmbeddingConfigs}; +use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort}; pub(crate) mod de; pub mod document_changes; mod document_deletion; mod document_operation; +mod extract; +mod guess_primary_key; mod partial_dump; +mod post_processing; mod update_by_function; +mod write; /// This is the main function of this crate. /// @@ -107,7 +85,7 @@ where }, ); - let (extractor_sender, mut writer_receiver) = pool + let (extractor_sender, writer_receiver) = pool .install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000)) .unwrap(); @@ -126,9 +104,10 @@ where fields_ids_map_store: &fields_ids_map_store, must_stop_processing, progress, + grenad_parameters: &grenad_parameters, }; - let mut index_embeddings = index.embedding_configs(wtxn)?; + let index_embeddings = index.embedding_configs(wtxn)?; let mut field_distribution = index.field_distribution(wtxn)?; let mut document_ids = index.documents_ids(wtxn)?; @@ -139,261 +118,28 @@ where // prevent moving the field_distribution and document_ids in the inner closure... let field_distribution = &mut field_distribution; let document_ids = &mut document_ids; - let extractor_handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || { - pool.install(move || { - let span = tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract"); - let _entered = span.enter(); - - let rtxn = index.read_txn()?; - - // document but we need to create a function that collects and compresses documents. - let document_sender = extractor_sender.documents(); - let document_extractor = DocumentsExtractor::new(document_sender, embedders); - let datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); - { - let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents"); - let _entered = span.enter(); - extract( + let extractor_handle = + Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || { + pool.install(move || { + extract::extract_all( document_changes, - &document_extractor, indexing_context, + indexer_span, + extractor_sender, + embedders, &mut extractor_allocs, - &datastore, - IndexingStep::ExtractingDocuments, - )?; - } - { - let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "documents"); - let _entered = span.enter(); - for document_extractor_data in datastore { - let document_extractor_data = document_extractor_data.0.into_inner(); - for (field, delta) in document_extractor_data.field_distribution_delta { - let current = field_distribution.entry(field).or_default(); - // adding the delta should never cause a negative result, as we are removing fields that previously existed. - *current = current.saturating_add_signed(delta); - } - document_extractor_data.docids_delta.apply_to(document_ids); - } - - field_distribution.retain(|_, v| *v != 0); - } - - let facet_field_ids_delta; - - { - let caches = { - let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "faceted"); - let _entered = span.enter(); - - FacetedDocidsExtractor::run_extraction( - grenad_parameters, - document_changes, - indexing_context, - &mut extractor_allocs, - &extractor_sender.field_id_docid_facet_sender(), - IndexingStep::ExtractingFacets - )? - }; - - { - let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted"); - let _entered = span.enter(); - - facet_field_ids_delta = merge_and_send_facet_docids( - caches, - FacetDatabases::new(index), - index, - extractor_sender.facet_docids(), - )?; - } - } - - { - let WordDocidsCaches { - word_docids, - word_fid_docids, - exact_word_docids, - word_position_docids, - fid_word_count_docids, - } = { - let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids"); - let _entered = span.enter(); - - WordDocidsExtractors::run_extraction( - grenad_parameters, - document_changes, - indexing_context, - &mut extractor_allocs, - IndexingStep::ExtractingWords - )? - }; - - { - let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids"); - let _entered = span.enter(); - merge_and_send_docids( - word_docids, - index.word_docids.remap_types(), - index, - extractor_sender.docids::(), - &indexing_context.must_stop_processing, - )?; - } - - { - let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids"); - let _entered = span.enter(); - merge_and_send_docids( - word_fid_docids, - index.word_fid_docids.remap_types(), - index, - extractor_sender.docids::(), - &indexing_context.must_stop_processing, - )?; - } - - { - let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids"); - let _entered = span.enter(); - merge_and_send_docids( - exact_word_docids, - index.exact_word_docids.remap_types(), - index, - extractor_sender.docids::(), - &indexing_context.must_stop_processing, - )?; - } - - { - let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids"); - let _entered = span.enter(); - merge_and_send_docids( - word_position_docids, - index.word_position_docids.remap_types(), - index, - extractor_sender.docids::(), - &indexing_context.must_stop_processing, - )?; - } - - { - let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids"); - let _entered = span.enter(); - merge_and_send_docids( - fid_word_count_docids, - index.field_id_word_count_docids.remap_types(), - index, - extractor_sender.docids::(), - &indexing_context.must_stop_processing, - )?; - } - } - - // run the proximity extraction only if the precision is by word - // this works only if the settings didn't change during this transaction. - let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default(); - if proximity_precision == ProximityPrecision::ByWord { - let caches = { - let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids"); - let _entered = span.enter(); - - ::run_extraction( - grenad_parameters, - document_changes, - indexing_context, - &mut extractor_allocs, - IndexingStep::ExtractingWordProximity, - )? - }; - - { - let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids"); - let _entered = span.enter(); - - merge_and_send_docids( - caches, - index.word_pair_proximity_docids.remap_types(), - index, - extractor_sender.docids::(), - &indexing_context.must_stop_processing, - )?; - } - } - - 'vectors: { - if index_embeddings.is_empty() { - break 'vectors; - } - - let embedding_sender = extractor_sender.embeddings(); - let extractor = EmbeddingExtractor::new(embedders, embedding_sender, field_distribution, request_threads()); - let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); - { - let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors"); - let _entered = span.enter(); - - extract( - document_changes, - &extractor, - indexing_context, - &mut extractor_allocs, - &datastore, - IndexingStep::ExtractingEmbeddings, - )?; - } - { - let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors"); - let _entered = span.enter(); - - for config in &mut index_embeddings { - 'data: for data in datastore.iter_mut() { - let data = &mut data.get_mut().0; - let Some(deladd) = data.remove(&config.name) else { continue 'data; }; - deladd.apply_to(&mut config.user_provided); - } - } - } - } - - 'geo: { - let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else { - break 'geo; - }; - let datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); - - { - let span = tracing::trace_span!(target: "indexing::documents::extract", "geo"); - let _entered = span.enter(); - - extract( - document_changes, - &extractor, - indexing_context, - &mut extractor_allocs, - &datastore, - IndexingStep::WritingGeoPoints - )?; - } - - merge_and_send_rtree( - datastore, - &rtxn, - index, - extractor_sender.geo(), - &indexing_context.must_stop_processing, - )?; - } - indexing_context.progress.update_progress(IndexingStep::WritingToDatabase); - finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed); - - Result::Ok((facet_field_ids_delta, index_embeddings)) - }).unwrap() - })?; + finished_extraction, + field_distribution, + index_embeddings, + document_ids, + ) + }) + .unwrap() + })?; let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map); let vector_arroy = index.vector_arroy; - let indexer_span = tracing::Span::current(); let arroy_writers: Result> = embedders .inner_as_ref() .iter() @@ -415,114 +161,30 @@ where }) .collect(); - // Used by by the ArroySetVector to copy the embedding into an - // aligned memory area, required by arroy to accept a new vector. - let mut aligned_embedding = Vec::new(); let mut arroy_writers = arroy_writers?; - { - let span = tracing::trace_span!(target: "indexing::write_db", "all"); - let _entered = span.enter(); - - let span = tracing::trace_span!(target: "indexing::write_db", "post_merge"); - let mut _entered_post_merge = None; - - while let Some(action) = writer_receiver.recv_action() { - if _entered_post_merge.is_none() - && finished_extraction.load(std::sync::atomic::Ordering::Relaxed) - { - _entered_post_merge = Some(span.enter()); - } - - match action { - ReceiverAction::WakeUp => (), - ReceiverAction::LargeEntry(LargeEntry { database, key, value }) => { - let database_name = database.database_name(); - let database = database.database(index); - if let Err(error) = database.put(wtxn, &key, &value) { - return Err(Error::InternalError(InternalError::StorePut { - database_name, - key: bstr::BString::from(&key[..]), - value_length: value.len(), - error, - })); - } - } - ReceiverAction::LargeVectors(large_vectors) => { - let LargeVectors { docid, embedder_id, .. } = large_vectors; - let (_, _, writer, dimensions) = - arroy_writers.get(&embedder_id).expect("requested a missing embedder"); - let mut embeddings = Embeddings::new(*dimensions); - for embedding in large_vectors.read_embeddings(*dimensions) { - embeddings.push(embedding.to_vec()).unwrap(); - } - writer.del_items(wtxn, *dimensions, docid)?; - writer.add_items(wtxn, docid, &embeddings)?; - } - } - - // Every time the is a message in the channel we search - // for new entries in the BBQueue buffers. - write_from_bbqueue( - &mut writer_receiver, - index, - wtxn, - &arroy_writers, - &mut aligned_embedding, - )?; - } - - // Once the extractor/writer channel is closed - // we must process the remaining BBQueue messages. - write_from_bbqueue( - &mut writer_receiver, - index, - wtxn, - &arroy_writers, - &mut aligned_embedding, - )?; - } + write_to_db(writer_receiver, finished_extraction, index, wtxn, &arroy_writers)?; indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors); let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?; - 'vectors: { - let span = - tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build"); - let _entered = span.enter(); + indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase); - if index_embeddings.is_empty() { - break 'vectors; - } + build_vectors( + index, + wtxn, + index_embeddings, + &mut arroy_writers, + &indexing_context.must_stop_processing, + )?; - indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase); - let mut rng = rand::rngs::StdRng::seed_from_u64(42); - for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers { - let dimensions = *dimensions; - writer.build_and_quantize( - wtxn, - &mut rng, - dimensions, - false, - &indexing_context.must_stop_processing, - )?; - } - - index.put_embedding_configs(wtxn, index_embeddings)?; - } - - indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets); - if index.facet_search(wtxn)? { - compute_facet_search_database(index, wtxn, global_fields_ids_map)?; - } - - compute_facet_level_database(index, wtxn, facet_field_ids_delta)?; - - indexing_context.progress.update_progress(IndexingStep::PostProcessingWords); - if let Some(prefix_delta) = compute_word_fst(index, wtxn)? { - compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?; - } + post_processing::post_process( + indexing_context, + wtxn, + global_fields_ids_map, + facet_field_ids_delta, + )?; indexing_context.progress.update_progress(IndexingStep::Finalizing); @@ -533,321 +195,15 @@ where drop(fields_ids_map_store); let new_fields_ids_map = new_fields_ids_map.into_inner().unwrap(); - index.put_fields_ids_map(wtxn, new_fields_ids_map.as_fields_ids_map())?; - - if let Some(new_primary_key) = new_primary_key { - index.put_primary_key(wtxn, new_primary_key.name())?; - } - - // used to update the localized and weighted maps while sharing the update code with the settings pipeline. - let mut inner_index_settings = InnerIndexSettings::from_index(index, wtxn, Some(embedders))?; - inner_index_settings.recompute_facets(wtxn, index)?; - inner_index_settings.recompute_searchables(wtxn, index)?; - index.put_field_distribution(wtxn, &field_distribution)?; - index.put_documents_ids(wtxn, &document_ids)?; - index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?; + update_index( + index, + wtxn, + new_fields_ids_map, + new_primary_key, + embedders, + field_distribution, + document_ids, + )?; Ok(()) } - -/// A function dedicated to manage all the available BBQueue frames. -/// -/// It reads all the available frames, do the corresponding database operations -/// and stops when no frame are available. -fn write_from_bbqueue( - writer_receiver: &mut WriterBbqueueReceiver<'_>, - index: &Index, - wtxn: &mut RwTxn<'_>, - arroy_writers: &HashMap, - aligned_embedding: &mut Vec, -) -> crate::Result<()> { - while let Some(frame_with_header) = writer_receiver.recv_frame() { - match frame_with_header.header() { - EntryHeader::DbOperation(operation) => { - let database_name = operation.database.database_name(); - let database = operation.database.database(index); - let frame = frame_with_header.frame(); - match operation.key_value(frame) { - (key, Some(value)) => { - if let Err(error) = database.put(wtxn, key, value) { - return Err(Error::InternalError(InternalError::StorePut { - database_name, - key: key.into(), - value_length: value.len(), - error, - })); - } - } - (key, None) => match database.delete(wtxn, key) { - Ok(false) => { - unreachable!("We tried to delete an unknown key: {key:?}") - } - Ok(_) => (), - Err(error) => { - return Err(Error::InternalError(InternalError::StoreDeletion { - database_name, - key: key.into(), - error, - })); - } - }, - } - } - EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid }) => { - for (_index, (_name, _embedder, writer, dimensions)) in arroy_writers { - let dimensions = *dimensions; - writer.del_items(wtxn, dimensions, docid)?; - } - } - EntryHeader::ArroySetVectors(asvs) => { - let ArroySetVectors { docid, embedder_id, .. } = asvs; - let frame = frame_with_header.frame(); - let (_, _, writer, dimensions) = - arroy_writers.get(&embedder_id).expect("requested a missing embedder"); - let mut embeddings = Embeddings::new(*dimensions); - let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding); - embeddings.append(all_embeddings.to_vec()).unwrap(); - writer.del_items(wtxn, *dimensions, docid)?; - writer.add_items(wtxn, docid, &embeddings)?; - } - } - } - - Ok(()) -} - -#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")] -fn compute_prefix_database( - index: &Index, - wtxn: &mut RwTxn, - prefix_delta: PrefixDelta, - grenad_parameters: GrenadParameters, -) -> Result<()> { - let PrefixDelta { modified, deleted } = prefix_delta; - // Compute word prefix docids - compute_word_prefix_docids(wtxn, index, &modified, &deleted, grenad_parameters)?; - // Compute exact word prefix docids - compute_exact_word_prefix_docids(wtxn, index, &modified, &deleted, grenad_parameters)?; - // Compute word prefix fid docids - compute_word_prefix_fid_docids(wtxn, index, &modified, &deleted, grenad_parameters)?; - // Compute word prefix position docids - compute_word_prefix_position_docids(wtxn, index, &modified, &deleted, grenad_parameters) -} - -#[tracing::instrument(level = "trace", skip_all, target = "indexing")] -fn compute_word_fst(index: &Index, wtxn: &mut RwTxn) -> Result> { - let rtxn = index.read_txn()?; - let words_fst = index.words_fst(&rtxn)?; - let mut word_fst_builder = WordFstBuilder::new(&words_fst)?; - let prefix_settings = index.prefix_settings(&rtxn)?; - word_fst_builder.with_prefix_settings(prefix_settings); - - let previous_words = index.word_docids.iter(&rtxn)?.remap_data_type::(); - let current_words = index.word_docids.iter(wtxn)?.remap_data_type::(); - for eob in merge_join_by(previous_words, current_words, |lhs, rhs| match (lhs, rhs) { - (Ok((l, _)), Ok((r, _))) => l.cmp(r), - (Err(_), _) | (_, Err(_)) => Ordering::Equal, - }) { - match eob { - EitherOrBoth::Both(lhs, rhs) => { - let (word, lhs_bytes) = lhs?; - let (_, rhs_bytes) = rhs?; - if lhs_bytes != rhs_bytes { - word_fst_builder.register_word(DelAdd::Addition, word.as_ref())?; - } - } - EitherOrBoth::Left(result) => { - let (word, _) = result?; - word_fst_builder.register_word(DelAdd::Deletion, word.as_ref())?; - } - EitherOrBoth::Right(result) => { - let (word, _) = result?; - word_fst_builder.register_word(DelAdd::Addition, word.as_ref())?; - } - } - } - - let (word_fst_mmap, prefix_data) = word_fst_builder.build(index, &rtxn)?; - index.main.remap_types::().put(wtxn, WORDS_FST_KEY, &word_fst_mmap)?; - if let Some(PrefixData { prefixes_fst_mmap, prefix_delta }) = prefix_data { - index.main.remap_types::().put( - wtxn, - WORDS_PREFIXES_FST_KEY, - &prefixes_fst_mmap, - )?; - Ok(Some(prefix_delta)) - } else { - Ok(None) - } -} - -#[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_search")] -fn compute_facet_search_database( - index: &Index, - wtxn: &mut RwTxn, - global_fields_ids_map: GlobalFieldsIdsMap, -) -> Result<()> { - let rtxn = index.read_txn()?; - let localized_attributes_rules = index.localized_attributes_rules(&rtxn)?; - let mut facet_search_builder = FacetSearchBuilder::new( - global_fields_ids_map, - localized_attributes_rules.unwrap_or_default(), - ); - - let previous_facet_id_string_docids = index - .facet_id_string_docids - .iter(&rtxn)? - .remap_data_type::() - .filter(|r| r.as_ref().map_or(true, |(k, _)| k.level == 0)); - let current_facet_id_string_docids = index - .facet_id_string_docids - .iter(wtxn)? - .remap_data_type::() - .filter(|r| r.as_ref().map_or(true, |(k, _)| k.level == 0)); - for eob in merge_join_by( - previous_facet_id_string_docids, - current_facet_id_string_docids, - |lhs, rhs| match (lhs, rhs) { - (Ok((l, _)), Ok((r, _))) => l.cmp(r), - (Err(_), _) | (_, Err(_)) => Ordering::Equal, - }, - ) { - match eob { - EitherOrBoth::Both(lhs, rhs) => { - let (_, _) = lhs?; - let (_, _) = rhs?; - } - EitherOrBoth::Left(result) => { - let (key, _) = result?; - facet_search_builder.register_from_key(DelAdd::Deletion, key)?; - } - EitherOrBoth::Right(result) => { - let (key, _) = result?; - facet_search_builder.register_from_key(DelAdd::Addition, key)?; - } - } - } - - facet_search_builder.merge_and_write(index, wtxn, &rtxn) -} - -#[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_field_ids")] -fn compute_facet_level_database( - index: &Index, - wtxn: &mut RwTxn, - facet_field_ids_delta: FacetFieldIdsDelta, -) -> Result<()> { - if let Some(modified_facet_string_ids) = facet_field_ids_delta.modified_facet_string_ids() { - let span = tracing::trace_span!(target: "indexing::facet_field_ids", "string"); - let _entered = span.enter(); - FacetsUpdateBulk::new_not_updating_level_0( - index, - modified_facet_string_ids, - FacetType::String, - ) - .execute(wtxn)?; - } - if let Some(modified_facet_number_ids) = facet_field_ids_delta.modified_facet_number_ids() { - let span = tracing::trace_span!(target: "indexing::facet_field_ids", "number"); - let _entered = span.enter(); - FacetsUpdateBulk::new_not_updating_level_0( - index, - modified_facet_number_ids, - FacetType::Number, - ) - .execute(wtxn)?; - } - - Ok(()) -} - -/// Returns the primary key that has already been set for this index or the -/// one we will guess by searching for the first key that contains "id" as a substring, -/// and whether the primary key changed -/// TODO move this elsewhere -pub fn retrieve_or_guess_primary_key<'a>( - rtxn: &'a RoTxn<'a>, - index: &Index, - new_fields_ids_map: &mut FieldsIdsMap, - primary_key_from_op: Option<&'a str>, - first_document: Option>, -) -> Result, bool), UserError>> { - // make sure that we have a declared primary key, either fetching it from the index or attempting to guess it. - - // do we have an existing declared primary key? - let (primary_key, has_changed) = if let Some(primary_key_from_db) = index.primary_key(rtxn)? { - // did we request a primary key in the operation? - match primary_key_from_op { - // we did, and it is different from the DB one - Some(primary_key_from_op) if primary_key_from_op != primary_key_from_db => { - return Ok(Err(UserError::PrimaryKeyCannotBeChanged( - primary_key_from_db.to_string(), - ))); - } - _ => (primary_key_from_db, false), - } - } else { - // no primary key in the DB => let's set one - // did we request a primary key in the operation? - let primary_key = if let Some(primary_key_from_op) = primary_key_from_op { - // set primary key from operation - primary_key_from_op - } else { - // guess primary key - let first_document = match first_document { - Some(document) => document, - // previous indexer when no pk is set + we send an empty payload => index_primary_key_no_candidate_found - None => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)), - }; - - let guesses: Result> = first_document - .keys() - .filter_map(|name| { - let Some(_) = new_fields_ids_map.insert(name) else { - return Some(Err(UserError::AttributeLimitReached.into())); - }; - name.to_lowercase().ends_with(DEFAULT_PRIMARY_KEY).then_some(Ok(name)) - }) - .collect(); - - let mut guesses = guesses?; - - // sort the keys in lexicographical order, so that fields are always in the same order. - guesses.sort_unstable(); - - match guesses.as_slice() { - [] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)), - [name] => { - tracing::info!("Primary key was not specified in index. Inferred to '{name}'"); - *name - } - multiple => { - return Ok(Err(UserError::MultiplePrimaryKeyCandidatesFound { - candidates: multiple - .iter() - .map(|candidate| candidate.to_string()) - .collect(), - })) - } - } - }; - (primary_key, true) - }; - - match PrimaryKey::new_or_insert(primary_key, new_fields_ids_map) { - Ok(primary_key) => Ok(Ok((primary_key, has_changed))), - Err(err) => Ok(Err(err)), - } -} - -fn request_threads() -> &'static ThreadPoolNoAbort { - static REQUEST_THREADS: OnceLock = OnceLock::new(); - - REQUEST_THREADS.get_or_init(|| { - ThreadPoolNoAbortBuilder::new() - .num_threads(crate::vector::REQUEST_PARALLELISM) - .thread_name(|index| format!("embedding-request-{index}")) - .build() - .unwrap() - }) -} diff --git a/crates/milli/src/update/new/indexer/post_processing.rs b/crates/milli/src/update/new/indexer/post_processing.rs new file mode 100644 index 000000000..201ab9ec9 --- /dev/null +++ b/crates/milli/src/update/new/indexer/post_processing.rs @@ -0,0 +1,229 @@ +use std::cmp::Ordering; + +use heed::types::{Bytes, DecodeIgnore, Str}; +use heed::RwTxn; +use itertools::{merge_join_by, EitherOrBoth}; + +use super::document_changes::IndexingContext; +use crate::facet::FacetType; +use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY}; +use crate::update::del_add::DelAdd; +use crate::update::facet::new_incremental::FacetsUpdateIncremental; +use crate::update::facet::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; +use crate::update::new::facet_search_builder::FacetSearchBuilder; +use crate::update::new::merger::FacetFieldIdDelta; +use crate::update::new::steps::IndexingStep; +use crate::update::new::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder}; +use crate::update::new::words_prefix_docids::{ + compute_exact_word_prefix_docids, compute_word_prefix_docids, compute_word_prefix_fid_docids, + compute_word_prefix_position_docids, +}; +use crate::update::new::FacetFieldIdsDelta; +use crate::update::{FacetsUpdateBulk, GrenadParameters}; +use crate::{GlobalFieldsIdsMap, Index, Result}; + +pub(super) fn post_process( + indexing_context: IndexingContext, + wtxn: &mut RwTxn<'_>, + global_fields_ids_map: GlobalFieldsIdsMap<'_>, + facet_field_ids_delta: FacetFieldIdsDelta, +) -> Result<()> +where + MSP: Fn() -> bool + Sync, +{ + let index = indexing_context.index; + indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets); + if index.facet_search(wtxn)? { + compute_facet_search_database(index, wtxn, global_fields_ids_map)?; + } + compute_facet_level_database(index, wtxn, facet_field_ids_delta)?; + indexing_context.progress.update_progress(IndexingStep::PostProcessingWords); + if let Some(prefix_delta) = compute_word_fst(index, wtxn)? { + compute_prefix_database(index, wtxn, prefix_delta, indexing_context.grenad_parameters)?; + }; + Ok(()) +} + +#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")] +fn compute_prefix_database( + index: &Index, + wtxn: &mut RwTxn, + prefix_delta: PrefixDelta, + grenad_parameters: &GrenadParameters, +) -> Result<()> { + let PrefixDelta { modified, deleted } = prefix_delta; + // Compute word prefix docids + compute_word_prefix_docids(wtxn, index, &modified, &deleted, grenad_parameters)?; + // Compute exact word prefix docids + compute_exact_word_prefix_docids(wtxn, index, &modified, &deleted, grenad_parameters)?; + // Compute word prefix fid docids + compute_word_prefix_fid_docids(wtxn, index, &modified, &deleted, grenad_parameters)?; + // Compute word prefix position docids + compute_word_prefix_position_docids(wtxn, index, &modified, &deleted, grenad_parameters) +} + +#[tracing::instrument(level = "trace", skip_all, target = "indexing")] +fn compute_word_fst(index: &Index, wtxn: &mut RwTxn) -> Result> { + let rtxn = index.read_txn()?; + let words_fst = index.words_fst(&rtxn)?; + let mut word_fst_builder = WordFstBuilder::new(&words_fst)?; + let prefix_settings = index.prefix_settings(&rtxn)?; + word_fst_builder.with_prefix_settings(prefix_settings); + + let previous_words = index.word_docids.iter(&rtxn)?.remap_data_type::(); + let current_words = index.word_docids.iter(wtxn)?.remap_data_type::(); + for eob in merge_join_by(previous_words, current_words, |lhs, rhs| match (lhs, rhs) { + (Ok((l, _)), Ok((r, _))) => l.cmp(r), + (Err(_), _) | (_, Err(_)) => Ordering::Equal, + }) { + match eob { + EitherOrBoth::Both(lhs, rhs) => { + let (word, lhs_bytes) = lhs?; + let (_, rhs_bytes) = rhs?; + if lhs_bytes != rhs_bytes { + word_fst_builder.register_word(DelAdd::Addition, word.as_ref())?; + } + } + EitherOrBoth::Left(result) => { + let (word, _) = result?; + word_fst_builder.register_word(DelAdd::Deletion, word.as_ref())?; + } + EitherOrBoth::Right(result) => { + let (word, _) = result?; + word_fst_builder.register_word(DelAdd::Addition, word.as_ref())?; + } + } + } + + let (word_fst_mmap, prefix_data) = word_fst_builder.build(index, &rtxn)?; + index.main.remap_types::().put(wtxn, WORDS_FST_KEY, &word_fst_mmap)?; + if let Some(PrefixData { prefixes_fst_mmap, prefix_delta }) = prefix_data { + index.main.remap_types::().put( + wtxn, + WORDS_PREFIXES_FST_KEY, + &prefixes_fst_mmap, + )?; + Ok(Some(prefix_delta)) + } else { + Ok(None) + } +} + +#[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_search")] +fn compute_facet_search_database( + index: &Index, + wtxn: &mut RwTxn, + global_fields_ids_map: GlobalFieldsIdsMap, +) -> Result<()> { + let rtxn = index.read_txn()?; + let localized_attributes_rules = index.localized_attributes_rules(&rtxn)?; + let mut facet_search_builder = FacetSearchBuilder::new( + global_fields_ids_map, + localized_attributes_rules.unwrap_or_default(), + ); + + let previous_facet_id_string_docids = index + .facet_id_string_docids + .iter(&rtxn)? + .remap_data_type::() + .filter(|r| r.as_ref().map_or(true, |(k, _)| k.level == 0)); + let current_facet_id_string_docids = index + .facet_id_string_docids + .iter(wtxn)? + .remap_data_type::() + .filter(|r| r.as_ref().map_or(true, |(k, _)| k.level == 0)); + for eob in merge_join_by( + previous_facet_id_string_docids, + current_facet_id_string_docids, + |lhs, rhs| match (lhs, rhs) { + (Ok((l, _)), Ok((r, _))) => l.cmp(r), + (Err(_), _) | (_, Err(_)) => Ordering::Equal, + }, + ) { + match eob { + EitherOrBoth::Both(lhs, rhs) => { + let (_, _) = lhs?; + let (_, _) = rhs?; + } + EitherOrBoth::Left(result) => { + let (key, _) = result?; + facet_search_builder.register_from_key(DelAdd::Deletion, key)?; + } + EitherOrBoth::Right(result) => { + let (key, _) = result?; + facet_search_builder.register_from_key(DelAdd::Addition, key)?; + } + } + } + + facet_search_builder.merge_and_write(index, wtxn, &rtxn) +} + +#[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_field_ids")] +fn compute_facet_level_database( + index: &Index, + wtxn: &mut RwTxn, + mut facet_field_ids_delta: FacetFieldIdsDelta, +) -> Result<()> { + for (fid, delta) in facet_field_ids_delta.consume_facet_string_delta() { + let span = tracing::trace_span!(target: "indexing::facet_field_ids", "string"); + let _entered = span.enter(); + match delta { + FacetFieldIdDelta::Bulk => { + tracing::debug!(%fid, "bulk string facet processing"); + FacetsUpdateBulk::new_not_updating_level_0(index, vec![fid], FacetType::String) + .execute(wtxn)? + } + FacetFieldIdDelta::Incremental(delta_data) => { + tracing::debug!(%fid, len=%delta_data.len(), "incremental string facet processing"); + FacetsUpdateIncremental::new( + index, + FacetType::String, + fid, + delta_data, + FACET_GROUP_SIZE, + FACET_MIN_LEVEL_SIZE, + FACET_MAX_GROUP_SIZE, + ) + .execute(wtxn)? + } + } + } + + for (fid, delta) in facet_field_ids_delta.consume_facet_number_delta() { + let span = tracing::trace_span!(target: "indexing::facet_field_ids", "number"); + let _entered = span.enter(); + match delta { + FacetFieldIdDelta::Bulk => { + tracing::debug!(%fid, "bulk number facet processing"); + FacetsUpdateBulk::new_not_updating_level_0(index, vec![fid], FacetType::Number) + .execute(wtxn)? + } + FacetFieldIdDelta::Incremental(delta_data) => { + tracing::debug!(%fid, len=%delta_data.len(), "incremental number facet processing"); + FacetsUpdateIncremental::new( + index, + FacetType::Number, + fid, + delta_data, + FACET_GROUP_SIZE, + FACET_MIN_LEVEL_SIZE, + FACET_MAX_GROUP_SIZE, + ) + .execute(wtxn)? + } + } + debug_assert!(crate::update::facet::sanity_checks( + index, + wtxn, + fid, + FacetType::Number, + FACET_GROUP_SIZE as usize, + FACET_MIN_LEVEL_SIZE as usize, + FACET_MAX_GROUP_SIZE as usize, + ) + .is_ok()); + } + + Ok(()) +} diff --git a/crates/milli/src/update/new/indexer/write.rs b/crates/milli/src/update/new/indexer/write.rs new file mode 100644 index 000000000..01748cf0d --- /dev/null +++ b/crates/milli/src/update/new/indexer/write.rs @@ -0,0 +1,193 @@ +use std::sync::atomic::AtomicBool; + +use bstr::ByteSlice as _; +use hashbrown::HashMap; +use heed::RwTxn; +use rand::SeedableRng as _; +use time::OffsetDateTime; + +use super::super::channel::*; +use crate::documents::PrimaryKey; +use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; +use crate::index::IndexEmbeddingConfig; +use crate::update::settings::InnerIndexSettings; +use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings}; +use crate::{Error, Index, InternalError, Result}; + +pub(super) fn write_to_db( + mut writer_receiver: WriterBbqueueReceiver<'_>, + finished_extraction: &AtomicBool, + index: &Index, + wtxn: &mut RwTxn<'_>, + arroy_writers: &HashMap, +) -> Result<()> { + // Used by by the ArroySetVector to copy the embedding into an + // aligned memory area, required by arroy to accept a new vector. + let mut aligned_embedding = Vec::new(); + let span = tracing::trace_span!(target: "indexing::write_db", "all"); + let _entered = span.enter(); + let span = tracing::trace_span!(target: "indexing::write_db", "post_merge"); + let mut _entered_post_merge = None; + while let Some(action) = writer_receiver.recv_action() { + if _entered_post_merge.is_none() + && finished_extraction.load(std::sync::atomic::Ordering::Relaxed) + { + _entered_post_merge = Some(span.enter()); + } + + match action { + ReceiverAction::WakeUp => (), + ReceiverAction::LargeEntry(LargeEntry { database, key, value }) => { + let database_name = database.database_name(); + let database = database.database(index); + if let Err(error) = database.put(wtxn, &key, &value) { + return Err(Error::InternalError(InternalError::StorePut { + database_name, + key: bstr::BString::from(&key[..]), + value_length: value.len(), + error, + })); + } + } + ReceiverAction::LargeVectors(large_vectors) => { + let LargeVectors { docid, embedder_id, .. } = large_vectors; + let (_, _, writer, dimensions) = + arroy_writers.get(&embedder_id).expect("requested a missing embedder"); + let mut embeddings = Embeddings::new(*dimensions); + for embedding in large_vectors.read_embeddings(*dimensions) { + embeddings.push(embedding.to_vec()).unwrap(); + } + writer.del_items(wtxn, *dimensions, docid)?; + writer.add_items(wtxn, docid, &embeddings)?; + } + } + + // Every time the is a message in the channel we search + // for new entries in the BBQueue buffers. + write_from_bbqueue( + &mut writer_receiver, + index, + wtxn, + arroy_writers, + &mut aligned_embedding, + )?; + } + write_from_bbqueue(&mut writer_receiver, index, wtxn, arroy_writers, &mut aligned_embedding)?; + Ok(()) +} + +#[tracing::instrument(level = "trace", skip_all, target = "indexing::vectors")] +pub(super) fn build_vectors( + index: &Index, + wtxn: &mut RwTxn<'_>, + index_embeddings: Vec, + arroy_writers: &mut HashMap, + must_stop_processing: &MSP, +) -> Result<()> +where + MSP: Fn() -> bool + Sync + Send, +{ + if index_embeddings.is_empty() { + return Ok(()); + } + + let mut rng = rand::rngs::StdRng::seed_from_u64(42); + for (_index, (_embedder_name, _embedder, writer, dimensions)) in arroy_writers { + let dimensions = *dimensions; + writer.build_and_quantize(wtxn, &mut rng, dimensions, false, must_stop_processing)?; + } + + index.put_embedding_configs(wtxn, index_embeddings)?; + Ok(()) +} + +pub(super) fn update_index( + index: &Index, + wtxn: &mut RwTxn<'_>, + new_fields_ids_map: FieldIdMapWithMetadata, + new_primary_key: Option>, + embedders: EmbeddingConfigs, + field_distribution: std::collections::BTreeMap, + document_ids: roaring::RoaringBitmap, +) -> Result<()> { + index.put_fields_ids_map(wtxn, new_fields_ids_map.as_fields_ids_map())?; + if let Some(new_primary_key) = new_primary_key { + index.put_primary_key(wtxn, new_primary_key.name())?; + } + let mut inner_index_settings = InnerIndexSettings::from_index(index, wtxn, Some(embedders))?; + inner_index_settings.recompute_facets(wtxn, index)?; + inner_index_settings.recompute_searchables(wtxn, index)?; + index.put_field_distribution(wtxn, &field_distribution)?; + index.put_documents_ids(wtxn, &document_ids)?; + index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?; + Ok(()) +} + +/// A function dedicated to manage all the available BBQueue frames. +/// +/// It reads all the available frames, do the corresponding database operations +/// and stops when no frame are available. +pub fn write_from_bbqueue( + writer_receiver: &mut WriterBbqueueReceiver<'_>, + index: &Index, + wtxn: &mut RwTxn<'_>, + arroy_writers: &HashMap, + aligned_embedding: &mut Vec, +) -> crate::Result<()> { + while let Some(frame_with_header) = writer_receiver.recv_frame() { + match frame_with_header.header() { + EntryHeader::DbOperation(operation) => { + let database_name = operation.database.database_name(); + let database = operation.database.database(index); + let frame = frame_with_header.frame(); + match operation.key_value(frame) { + (key, Some(value)) => { + if let Err(error) = database.put(wtxn, key, value) { + return Err(Error::InternalError(InternalError::StorePut { + database_name, + key: key.into(), + value_length: value.len(), + error, + })); + } + } + (key, None) => match database.delete(wtxn, key) { + Ok(false) => { + unreachable!( + "We tried to delete an unknown key from {database_name}: {:?}", + key.as_bstr() + ) + } + Ok(_) => (), + Err(error) => { + return Err(Error::InternalError(InternalError::StoreDeletion { + database_name, + key: key.into(), + error, + })); + } + }, + } + } + EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid }) => { + for (_index, (_name, _embedder, writer, dimensions)) in arroy_writers { + let dimensions = *dimensions; + writer.del_items(wtxn, dimensions, docid)?; + } + } + EntryHeader::ArroySetVectors(asvs) => { + let ArroySetVectors { docid, embedder_id, .. } = asvs; + let frame = frame_with_header.frame(); + let (_, _, writer, dimensions) = + arroy_writers.get(&embedder_id).expect("requested a missing embedder"); + let mut embeddings = Embeddings::new(*dimensions); + let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding); + embeddings.append(all_embeddings.to_vec()).unwrap(); + writer.del_items(wtxn, *dimensions, docid)?; + writer.add_items(wtxn, docid, &embeddings)?; + } + } + } + + Ok(()) +} diff --git a/crates/milli/src/update/new/merger.rs b/crates/milli/src/update/new/merger.rs index 9e87388a2..090add6bd 100644 --- a/crates/milli/src/update/new/merger.rs +++ b/crates/milli/src/update/new/merger.rs @@ -1,6 +1,6 @@ use std::cell::RefCell; -use hashbrown::HashSet; +use hashbrown::HashMap; use heed::types::Bytes; use heed::{Database, RoTxn}; use memmap2::Mmap; @@ -12,6 +12,7 @@ use super::extract::{ merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, FacetKind, GeoExtractorData, }; +use crate::update::facet::new_incremental::FacetFieldIdChange; use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result}; #[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")] @@ -100,12 +101,18 @@ pub fn merge_and_send_facet_docids<'extractor>( mut caches: Vec>, database: FacetDatabases, index: &Index, + rtxn: &RoTxn, docids_sender: FacetDocidsSender, ) -> Result { + let max_string_count = (index.facet_id_string_docids.len(rtxn)? / 500) as usize; + let max_number_count = (index.facet_id_f64_docids.len(rtxn)? / 500) as usize; + let max_string_count = max_string_count.clamp(1000, 100_000); + let max_number_count = max_number_count.clamp(1000, 100_000); transpose_and_freeze_caches(&mut caches)? .into_par_iter() .map(|frozen| { - let mut facet_field_ids_delta = FacetFieldIdsDelta::default(); + let mut facet_field_ids_delta = + FacetFieldIdsDelta::new(max_string_count, max_number_count); let rtxn = index.read_txn()?; merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| { let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?; @@ -126,7 +133,10 @@ pub fn merge_and_send_facet_docids<'extractor>( Ok(facet_field_ids_delta) }) - .reduce(|| Ok(FacetFieldIdsDelta::default()), |lhs, rhs| Ok(lhs?.merge(rhs?))) + .reduce( + || Ok(FacetFieldIdsDelta::new(max_string_count, max_number_count)), + |lhs, rhs| Ok(lhs?.merge(rhs?)), + ) } pub struct FacetDatabases<'a> { @@ -155,60 +165,131 @@ impl<'a> FacetDatabases<'a> { } } -#[derive(Debug, Default)] +#[derive(Debug)] +pub enum FacetFieldIdDelta { + Bulk, + Incremental(Vec), +} + +impl FacetFieldIdDelta { + fn push(&mut self, facet_value: &[u8], max_count: usize) { + *self = match std::mem::replace(self, FacetFieldIdDelta::Bulk) { + FacetFieldIdDelta::Bulk => FacetFieldIdDelta::Bulk, + FacetFieldIdDelta::Incremental(mut v) => { + if v.len() >= max_count { + FacetFieldIdDelta::Bulk + } else { + v.push(FacetFieldIdChange { facet_value: facet_value.into() }); + FacetFieldIdDelta::Incremental(v) + } + } + } + } + + fn merge(&mut self, rhs: Option, max_count: usize) { + let Some(rhs) = rhs else { + return; + }; + *self = match (std::mem::replace(self, FacetFieldIdDelta::Bulk), rhs) { + (FacetFieldIdDelta::Bulk, _) | (_, FacetFieldIdDelta::Bulk) => FacetFieldIdDelta::Bulk, + ( + FacetFieldIdDelta::Incremental(mut left), + FacetFieldIdDelta::Incremental(mut right), + ) => { + if left.len() + right.len() >= max_count { + FacetFieldIdDelta::Bulk + } else { + left.append(&mut right); + FacetFieldIdDelta::Incremental(left) + } + } + }; + } +} + +#[derive(Debug)] pub struct FacetFieldIdsDelta { /// The field ids that have been modified - modified_facet_string_ids: HashSet, - modified_facet_number_ids: HashSet, + modified_facet_string_ids: HashMap, + modified_facet_number_ids: HashMap, + max_string_count: usize, + max_number_count: usize, } impl FacetFieldIdsDelta { - fn register_facet_string_id(&mut self, field_id: FieldId) { - self.modified_facet_string_ids.insert(field_id); + pub fn new(max_string_count: usize, max_number_count: usize) -> Self { + Self { + max_string_count, + max_number_count, + modified_facet_string_ids: Default::default(), + modified_facet_number_ids: Default::default(), + } } - fn register_facet_number_id(&mut self, field_id: FieldId) { - self.modified_facet_number_ids.insert(field_id); + fn register_facet_string_id(&mut self, field_id: FieldId, facet_value: &[u8]) { + self.modified_facet_string_ids + .entry(field_id) + .or_insert(FacetFieldIdDelta::Incremental(Default::default())) + .push(facet_value, self.max_string_count); + } + + fn register_facet_number_id(&mut self, field_id: FieldId, facet_value: &[u8]) { + self.modified_facet_number_ids + .entry(field_id) + .or_insert(FacetFieldIdDelta::Incremental(Default::default())) + .push(facet_value, self.max_number_count); } fn register_from_key(&mut self, key: &[u8]) { - let (facet_kind, field_id) = self.extract_key_data(key); - match facet_kind { - FacetKind::Number => self.register_facet_number_id(field_id), - FacetKind::String => self.register_facet_string_id(field_id), + let (facet_kind, field_id, facet_value) = self.extract_key_data(key); + match (facet_kind, facet_value) { + (FacetKind::Number, Some(facet_value)) => { + self.register_facet_number_id(field_id, facet_value) + } + (FacetKind::String, Some(facet_value)) => { + self.register_facet_string_id(field_id, facet_value) + } _ => (), } } - fn extract_key_data(&self, key: &[u8]) -> (FacetKind, FieldId) { + fn extract_key_data<'key>(&self, key: &'key [u8]) -> (FacetKind, FieldId, Option<&'key [u8]>) { let facet_kind = FacetKind::from(key[0]); let field_id = FieldId::from_be_bytes([key[1], key[2]]); - (facet_kind, field_id) + let facet_value = if key.len() >= 4 { + // level is also stored in the key at [3] (always 0) + Some(&key[4..]) + } else { + None + }; + + (facet_kind, field_id, facet_value) } - pub fn modified_facet_string_ids(&self) -> Option> { - if self.modified_facet_string_ids.is_empty() { - None - } else { - Some(self.modified_facet_string_ids.iter().copied().collect()) - } + pub fn consume_facet_string_delta( + &mut self, + ) -> impl Iterator + '_ { + self.modified_facet_string_ids.drain() } - pub fn modified_facet_number_ids(&self) -> Option> { - if self.modified_facet_number_ids.is_empty() { - None - } else { - Some(self.modified_facet_number_ids.iter().copied().collect()) - } + pub fn consume_facet_number_delta( + &mut self, + ) -> impl Iterator + '_ { + self.modified_facet_number_ids.drain() } pub fn merge(mut self, rhs: Self) -> Self { - let Self { modified_facet_number_ids, modified_facet_string_ids } = rhs; - modified_facet_number_ids.into_iter().for_each(|fid| { - self.modified_facet_number_ids.insert(fid); + // rhs.max_xx_count is assumed to be equal to self.max_xx_count, and so gets unused + let Self { modified_facet_number_ids, modified_facet_string_ids, .. } = rhs; + modified_facet_number_ids.into_iter().for_each(|(fid, mut delta)| { + let old_delta = self.modified_facet_number_ids.remove(&fid); + delta.merge(old_delta, self.max_number_count); + self.modified_facet_number_ids.insert(fid, delta); }); - modified_facet_string_ids.into_iter().for_each(|fid| { - self.modified_facet_string_ids.insert(fid); + modified_facet_string_ids.into_iter().for_each(|(fid, mut delta)| { + let old_delta = self.modified_facet_string_ids.remove(&fid); + delta.merge(old_delta, self.max_string_count); + self.modified_facet_string_ids.insert(fid, delta); }); self } diff --git a/crates/milli/src/update/new/mod.rs b/crates/milli/src/update/new/mod.rs index 87995ee55..b7e08a461 100644 --- a/crates/milli/src/update/new/mod.rs +++ b/crates/milli/src/update/new/mod.rs @@ -16,6 +16,7 @@ pub mod indexer; mod merger; mod parallel_iterator_ext; mod ref_cell_ext; +pub mod reindex; pub(crate) mod steps; pub(crate) mod thread_local; pub mod vector_document; diff --git a/crates/milli/src/update/new/reindex.rs b/crates/milli/src/update/new/reindex.rs new file mode 100644 index 000000000..6bfeb123e --- /dev/null +++ b/crates/milli/src/update/new/reindex.rs @@ -0,0 +1,38 @@ +use heed::RwTxn; + +use super::document::{Document, DocumentFromDb}; +use crate::progress::{self, AtomicSubStep, Progress}; +use crate::{FieldDistribution, Index, Result}; + +pub fn field_distribution(index: &Index, wtxn: &mut RwTxn<'_>, progress: &Progress) -> Result<()> { + let mut distribution = FieldDistribution::new(); + + let document_count = index.number_of_documents(wtxn)?; + let field_id_map = index.fields_ids_map(wtxn)?; + + let (update_document_count, sub_step) = + AtomicSubStep::::new(document_count as u32); + progress.update_progress(sub_step); + + let docids = index.documents_ids(wtxn)?; + + for docid in docids { + update_document_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + + let Some(document) = DocumentFromDb::new(docid, wtxn, index, &field_id_map)? else { + continue; + }; + let geo_iter = document.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv))); + for res in document.iter_top_level_fields().chain(geo_iter) { + let (field_name, _) = res?; + if let Some(count) = distribution.get_mut(field_name) { + *count += 1; + } else { + distribution.insert(field_name.to_owned(), 1); + } + } + } + + index.put_field_distribution(wtxn, &distribution)?; + Ok(()) +} diff --git a/crates/milli/src/update/new/thread_local.rs b/crates/milli/src/update/new/thread_local.rs index acdc78c7b..a5af2b0bb 100644 --- a/crates/milli/src/update/new/thread_local.rs +++ b/crates/milli/src/update/new/thread_local.rs @@ -29,9 +29,9 @@ use std::cell::RefCell; /// - An example of a type that verifies (1) and (2) is [`std::rc::Rc`] (when `T` is `Send` and `Sync`). /// - An example of a type that doesn't verify (1) is thread-local data. /// - An example of a type that doesn't verify (2) is [`std::sync::MutexGuard`]: a lot of mutex implementations require that -/// a lock is returned to the operating system on the same thread that initially locked the mutex, failing to uphold this -/// invariant will cause Undefined Behavior -/// (see last § in [the nomicon](https://doc.rust-lang.org/nomicon/send-and-sync.html)). +/// a lock is returned to the operating system on the same thread that initially locked the mutex, failing to uphold this +/// invariant will cause Undefined Behavior +/// (see last § in [the nomicon](https://doc.rust-lang.org/nomicon/send-and-sync.html)). /// /// It is **always safe** to implement this trait on a type that is `Send`, but no placeholder impl is provided due to limitations in /// coherency. Use the [`FullySend`] wrapper in this situation. @@ -86,7 +86,7 @@ impl MostlySendWrapper { /// # Safety /// /// 1. `T` is [`MostlySend`], so by its safety contract it can be accessed by any thread and all of its operations are available -/// from any thread. +/// from any thread. /// 2. (P1) of `MostlySendWrapper::new` forces the user to never access the value from multiple threads concurrently. unsafe impl Send for MostlySendWrapper {} diff --git a/crates/milli/src/update/new/words_prefix_docids.rs b/crates/milli/src/update/new/words_prefix_docids.rs index bf64049c3..7ba2b9b71 100644 --- a/crates/milli/src/update/new/words_prefix_docids.rs +++ b/crates/milli/src/update/new/words_prefix_docids.rs @@ -25,7 +25,7 @@ impl WordPrefixDocids { fn new( database: Database, prefix_database: Database, - grenad_parameters: GrenadParameters, + grenad_parameters: &GrenadParameters, ) -> WordPrefixDocids { WordPrefixDocids { database, @@ -161,7 +161,7 @@ impl WordPrefixIntegerDocids { fn new( database: Database, prefix_database: Database, - grenad_parameters: GrenadParameters, + grenad_parameters: &GrenadParameters, ) -> WordPrefixIntegerDocids { WordPrefixIntegerDocids { database, @@ -311,7 +311,7 @@ pub fn compute_word_prefix_docids( index: &Index, prefix_to_compute: &BTreeSet, prefix_to_delete: &BTreeSet, - grenad_parameters: GrenadParameters, + grenad_parameters: &GrenadParameters, ) -> Result<()> { WordPrefixDocids::new( index.word_docids.remap_key_type(), @@ -327,7 +327,7 @@ pub fn compute_exact_word_prefix_docids( index: &Index, prefix_to_compute: &BTreeSet, prefix_to_delete: &BTreeSet, - grenad_parameters: GrenadParameters, + grenad_parameters: &GrenadParameters, ) -> Result<()> { WordPrefixDocids::new( index.exact_word_docids.remap_key_type(), @@ -343,7 +343,7 @@ pub fn compute_word_prefix_fid_docids( index: &Index, prefix_to_compute: &BTreeSet, prefix_to_delete: &BTreeSet, - grenad_parameters: GrenadParameters, + grenad_parameters: &GrenadParameters, ) -> Result<()> { WordPrefixIntegerDocids::new( index.word_fid_docids.remap_key_type(), @@ -359,7 +359,7 @@ pub fn compute_word_prefix_position_docids( index: &Index, prefix_to_compute: &BTreeSet, prefix_to_delete: &BTreeSet, - grenad_parameters: GrenadParameters, + grenad_parameters: &GrenadParameters, ) -> Result<()> { WordPrefixIntegerDocids::new( index.word_position_docids.remap_key_type(), diff --git a/crates/milli/src/vector/error.rs b/crates/milli/src/vector/error.rs index 97bbe5d68..5edabed0d 100644 --- a/crates/milli/src/vector/error.rs +++ b/crates/milli/src/vector/error.rs @@ -86,9 +86,9 @@ pub enum EmbedErrorKind { }, option_info(.0.as_deref(), "server replied with "))] RestBadRequest(Option, ConfigurationSource), - #[error("received internal error HTTP {0} from embedding server{}", option_info(.1.as_deref(), "server replied with "))] + #[error("received internal error HTTP {} from embedding server{}", .0, option_info(.1.as_deref(), "server replied with "))] RestInternalServerError(u16, Option), - #[error("received unexpected HTTP {0} from embedding server{}", option_info(.1.as_deref(), "server replied with "))] + #[error("received unexpected HTTP {} from embedding server{}", .0, option_info(.1.as_deref(), "server replied with "))] RestOtherStatusCode(u16, Option), #[error("could not reach embedding server:\n - {0}")] RestNetwork(ureq::Transport), diff --git a/crates/milli/src/vector/hf.rs b/crates/milli/src/vector/hf.rs index 3fe28e53a..447a88f5d 100644 --- a/crates/milli/src/vector/hf.rs +++ b/crates/milli/src/vector/hf.rs @@ -163,8 +163,10 @@ impl Embedder { let token_ids = Tensor::stack(&token_ids, 0).map_err(EmbedError::tensor_shape)?; let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?; - let embeddings = - self.model.forward(&token_ids, &token_type_ids).map_err(EmbedError::model_forward)?; + let embeddings = self + .model + .forward(&token_ids, &token_type_ids, None) + .map_err(EmbedError::model_forward)?; // Apply some avg-pooling by taking the mean embedding value for all tokens (including padding) let (_n_sentence, n_tokens, _hidden_size) = @@ -185,8 +187,10 @@ impl Embedder { Tensor::new(token_ids, &self.model.device).map_err(EmbedError::tensor_shape)?; let token_ids = Tensor::stack(&[token_ids], 0).map_err(EmbedError::tensor_shape)?; let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?; - let embeddings = - self.model.forward(&token_ids, &token_type_ids).map_err(EmbedError::model_forward)?; + let embeddings = self + .model + .forward(&token_ids, &token_type_ids, None) + .map_err(EmbedError::model_forward)?; // Apply some avg-pooling by taking the mean embedding value for all tokens (including padding) let (_n_sentence, n_tokens, _hidden_size) = diff --git a/crates/milli/src/vector/mod.rs b/crates/milli/src/vector/mod.rs index a1d71ef93..0be698027 100644 --- a/crates/milli/src/vector/mod.rs +++ b/crates/milli/src/vector/mod.rs @@ -9,6 +9,7 @@ use heed::{RoTxn, RwTxn, Unspecified}; use ordered_float::OrderedFloat; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; use self::error::{EmbedError, NewEmbedderError}; use crate::prompt::{Prompt, PromptData}; @@ -710,18 +711,20 @@ impl Embedder { /// /// The intended use is to make the similarity score more comparable to the regular ranking score. /// This allows to correct effects where results are too "packed" around a certain value. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize, Serialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize, Serialize, ToSchema)] #[serde(from = "DistributionShiftSerializable")] #[serde(into = "DistributionShiftSerializable")] pub struct DistributionShift { /// Value where the results are "packed". /// /// Similarity scores are translated so that they are packed around 0.5 instead + #[schema(value_type = f32)] pub current_mean: OrderedFloat, /// standard deviation of a similarity score. /// /// Set below 0.4 to make the results less packed around the mean, and above 0.4 to make them more packed. + #[schema(value_type = f32)] pub current_sigma: OrderedFloat, } diff --git a/crates/milli/src/vector/openai.rs b/crates/milli/src/vector/openai.rs index 7262bfef8..938c04fe3 100644 --- a/crates/milli/src/vector/openai.rs +++ b/crates/milli/src/vector/openai.rs @@ -1,3 +1,4 @@ +use std::fmt; use std::time::Instant; use ordered_float::OrderedFloat; @@ -168,7 +169,6 @@ fn infer_api_key() -> String { .unwrap_or_default() } -#[derive(Debug)] pub struct Embedder { tokenizer: tiktoken_rs::CoreBPE, rest_embedder: RestEmbedder, @@ -302,3 +302,13 @@ impl Embedder { self.options.distribution() } } + +impl fmt::Debug for Embedder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Embedder") + .field("tokenizer", &"CoreBPE") + .field("rest_embedder", &self.rest_embedder) + .field("options", &self.options) + .finish() + } +} diff --git a/crates/milli/src/vector/rest.rs b/crates/milli/src/vector/rest.rs index 98be311d4..eb05bac64 100644 --- a/crates/milli/src/vector/rest.rs +++ b/crates/milli/src/vector/rest.rs @@ -175,7 +175,7 @@ impl Embedder { pub fn embed_tokens( &self, - tokens: &[usize], + tokens: &[u32], deadline: Option, ) -> Result { let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions), deadline)?; diff --git a/crates/milli/src/vector/settings.rs b/crates/milli/src/vector/settings.rs index d1cf364a2..4a1b1882c 100644 --- a/crates/milli/src/vector/settings.rs +++ b/crates/milli/src/vector/settings.rs @@ -4,6 +4,7 @@ use std::num::NonZeroUsize; use deserr::Deserr; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; use super::{ollama, openai, DistributionShift}; use crate::prompt::{default_max_bytes, PromptData}; @@ -11,48 +12,61 @@ use crate::update::Setting; use crate::vector::EmbeddingConfig; use crate::UserError; -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] #[serde(deny_unknown_fields, rename_all = "camelCase")] #[deserr(rename_all = camelCase, deny_unknown_fields)] pub struct EmbeddingSettings { #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub source: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub model: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub revision: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub api_key: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub dimensions: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub binary_quantized: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub document_template: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub document_template_max_bytes: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub url: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub request: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub response: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option>)] pub headers: Setting>, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + #[schema(value_type = Option)] pub distribution: Setting, } @@ -539,7 +553,7 @@ impl EmbeddingSettings { } } -#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] +#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] #[serde(deny_unknown_fields, rename_all = "camelCase")] #[deserr(rename_all = camelCase, deny_unknown_fields)] pub enum EmbedderSource { diff --git a/crates/tracing-trace/Cargo.toml b/crates/tracing-trace/Cargo.toml index 9afada3c2..a8a2d5d8b 100644 --- a/crates/tracing-trace/Cargo.toml +++ b/crates/tracing-trace/Cargo.toml @@ -8,17 +8,17 @@ edition = "2021" [dependencies] color-spantrace = "0.2.1" fxprof-processed-profile = "0.7.0" -serde = { version = "1.0.204", features = ["derive"] } -serde_json = "1.0.120" -tracing = "0.1.40" -tracing-error = "0.2.0" -tracing-subscriber = "0.3.18" -byte-unit = { version = "5.1.4", default-features = false, features = [ +serde = { version = "1.0.217", features = ["derive"] } +serde_json = "1.0.135" +tracing = "0.1.41" +tracing-error = "0.2.1" +tracing-subscriber = "0.3.19" +byte-unit = { version = "5.1.6", default-features = false, features = [ "std", "byte", "serde", ] } -tokio = { version = "1.38.0", features = ["sync"] } +tokio = { version = "1.42.0", features = ["sync"] } [target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies] -libproc = "0.14.8" +libproc = "0.14.10" diff --git a/crates/xtask/Cargo.toml b/crates/xtask/Cargo.toml index 79bed3d2e..496e1d362 100644 --- a/crates/xtask/Cargo.toml +++ b/crates/xtask/Cargo.toml @@ -11,34 +11,34 @@ license.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -anyhow = "1.0.86" +anyhow = "1.0.95" build-info = { version = "1.7.0", path = "../build-info" } -cargo_metadata = "0.18.1" -clap = { version = "4.5.9", features = ["derive"] } -futures-core = "0.3.30" -futures-util = "0.3.30" -reqwest = { version = "0.12.5", features = [ +cargo_metadata = "0.19.1" +clap = { version = "4.5.24", features = ["derive"] } +futures-core = "0.3.31" +futures-util = "0.3.31" +reqwest = { version = "0.12.12", features = [ "stream", "json", "rustls-tls", ], default-features = false } -serde = { version = "1.0.204", features = ["derive"] } -serde_json = "1.0.120" +serde = { version = "1.0.217", features = ["derive"] } +serde_json = "1.0.135" sha2 = "0.10.8" -sysinfo = "0.30.13" -time = { version = "0.3.36", features = [ +sysinfo = "0.33.1" +time = { version = "0.3.37", features = [ "serde", "serde-human-readable", "macros", ] } -tokio = { version = "1.38.0", features = [ +tokio = { version = "1.42.0", features = [ "rt", "net", "time", "process", "signal", ] } -tracing = "0.1.40" -tracing-subscriber = "0.3.18" +tracing = "0.1.41" +tracing-subscriber = "0.3.19" tracing-trace = { version = "0.1.0", path = "../tracing-trace" } -uuid = { version = "1.10.0", features = ["v7", "serde"] } +uuid = { version = "1.11.0", features = ["v7", "serde"] } diff --git a/crates/xtask/src/bench/env_info.rs b/crates/xtask/src/bench/env_info.rs index 08dacf915..877e7c2ff 100644 --- a/crates/xtask/src/bench/env_info.rs +++ b/crates/xtask/src/bench/env_info.rs @@ -27,8 +27,7 @@ impl Environment { let unknown_string = String::from("Unknown"); let mut system = System::new(); - system.refresh_cpu(); - system.refresh_cpu_frequency(); + system.refresh_cpu_all(); system.refresh_memory(); let (cpu, frequency) = match system.cpus().first() { @@ -50,9 +49,7 @@ impl Environment { if let Some(os) = System::os_version() { software.push(VersionInfo { name: os, version: String::from("kernel-release") }); } - if let Some(arch) = System::cpu_arch() { - software.push(VersionInfo { name: arch, version: String::from("arch") }); - } + software.push(VersionInfo { name: System::cpu_arch(), version: String::from("arch") }); Self { hostname: System::host_name(), diff --git a/download-latest.sh b/download-latest.sh index c533d6616..b74722586 100644 --- a/download-latest.sh +++ b/download-latest.sh @@ -33,10 +33,12 @@ get_latest() { exit 1 fi - if [ -z "$GITHUB_PAT" ]; then - curl -s "$latest_release" > "$temp_file" || return 1 - else + if [ -n "$GITHUB_TOKEN" ]; then + curl -H "Authorization: Bearer $GITHUB_TOKEN" -s "$latest_release" > "$temp_file" || return 1 + elif [ -n "$GITHUB_PAT" ]; then curl -H "Authorization: token $GITHUB_PAT" -s "$latest_release" > "$temp_file" || return 1 + else + curl -s "$latest_release" > "$temp_file" || return 1 fi latest="$(cat "$temp_file" | grep '"tag_name":' | cut -d ':' -f2 | tr -d '"' | tr -d ',' | tr -d ' ')" diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 24473f6c8..17116ad8d 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "1.79.0" +channel = "1.81.0" components = ["clippy"]