From 567194b92550aff6abcdf19bfc6ade26ff1552dc Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 19 Mar 2024 16:56:21 +0100 Subject: [PATCH] Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1" This reverts commit bd74cce86a5e610477cc68bd555a636bcd735ca3, reversing changes made to d2f77e88bd9f9ce018a71e78c120274336e80580. --- .github/workflows/bench-pr.yml | 2 +- .github/workflows/milestone-workflow.yml | 19 - CONTRIBUTING.md | 2 +- Cargo.lock | 195 +++++++---- Cargo.toml | 2 +- meilisearch-types/Cargo.toml | 2 +- meilisearch/Cargo.toml | 12 +- meilisearch/src/main.rs | 2 +- meilisearch/src/option.rs | 4 +- meilisearch/src/routes/indexes/settings.rs | 1 - meilisearch/src/search.rs | 21 +- meilisearch/tests/documents/add_documents.rs | 239 +------------ meilisearch/tests/search/facet_search.rs | 43 --- milli/src/index.rs | 14 +- milli/src/lib.rs | 7 +- milli/src/order_by_map.rs | 57 --- milli/src/search/facet/facet_range_search.rs | 4 +- milli/src/search/facet/mod.rs | 3 - milli/src/search/facet/search.rs | 326 ------------------ milli/src/search/mod.rs | 249 ++++++++++++- milli/src/search/new/tests/typo_proximity.rs | 2 +- milli/src/update/settings.rs | 14 +- milli/src/vector/error.rs | 39 --- milli/src/vector/mod.rs | 18 - milli/src/vector/ollama.rs | 307 ----------------- milli/src/vector/openai.rs | 20 +- milli/src/vector/settings.rs | 29 +- workloads/settings-add-remove-filters.json | 94 ----- workloads/settings-proximity-precision.json | 86 ----- .../settings-remove-add-swap-searchable.json | 114 ------ workloads/settings-typo.json | 115 ------ xtask/src/bench/dashboard.rs | 320 ++++++++--------- xtask/src/bench/mod.rs | 24 +- xtask/src/bench/workload.rs | 16 +- 34 files changed, 614 insertions(+), 1788 deletions(-) delete mode 100644 milli/src/order_by_map.rs delete mode 100644 milli/src/search/facet/search.rs delete mode 100644 milli/src/vector/ollama.rs delete mode 100644 workloads/settings-add-remove-filters.json delete mode 100644 workloads/settings-proximity-precision.json delete mode 100644 workloads/settings-remove-add-swap-searchable.json delete mode 100644 workloads/settings-typo.json diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml index 418a23717..6f4956542 100644 --- a/.github/workflows/bench-pr.yml +++ b/.github/workflows/bench-pr.yml @@ -43,4 +43,4 @@ jobs: - name: Run benchmarks on PR ${{ github.event.issue.id }} run: | - cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "[Comment](${{ github.event.comment.html_url }}) on [#${{ github.event.issue.number }}](${{ github.event.issue.html_url }})" -- ${{ steps.command.outputs.command-arguments }} \ No newline at end of file + cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "[Comment](${{ github.event.comment.url }}) on [#${{github.event.issue.id}}](${{ github.event.issue.url }})" -- ${{ steps.command.outputs.command-arguments }} \ No newline at end of file diff --git a/.github/workflows/milestone-workflow.yml b/.github/workflows/milestone-workflow.yml index 2ede3dc21..2b8b7bf62 100644 --- a/.github/workflows/milestone-workflow.yml +++ b/.github/workflows/milestone-workflow.yml @@ -110,25 +110,6 @@ jobs: --milestone $MILESTONE_VERSION \ --assignee curquiza - create-update-version-issue: - needs: get-release-version - # Create the changelog issue if the release is not only a patch release - if: github.event.action == 'created' - runs-on: ubuntu-latest - env: - ISSUE_TEMPLATE: issue-template.md - steps: - - uses: actions/checkout@v3 - - name: Download the issue template - run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-version-issue.md > $ISSUE_TEMPLATE - - name: Create the issue - run: | - gh issue create \ - --title "Update version in Cargo.toml for $MILESTONE_VERSION" \ - --label 'maintenance' \ - --body-file $ISSUE_TEMPLATE \ - --milestone $MILESTONE_VERSION - # ---------------- # MILESTONE CLOSED # ---------------- diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 24034aba6..073da7031 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,7 +4,7 @@ First, thank you for contributing to Meilisearch! The goal of this document is t Remember that there are many ways to contribute other than writing code: writing [tutorials or blog posts](https://github.com/meilisearch/awesome-meilisearch), improving [the documentation](https://github.com/meilisearch/documentation), submitting [bug reports](https://github.com/meilisearch/meilisearch/issues/new?assignees=&labels=&template=bug_report.md&title=) and [feature requests](https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal)... -Meilisearch can manage multiple indexes, handle the update store, and expose an HTTP API. Search and indexation are the domain of our core engine, [`milli`](https://github.com/meilisearch/meilisearch/tree/main/milli), while tokenization is handled by [our `charabia` library](https://github.com/meilisearch/charabia/). +The code in this repository is only concerned with managing multiple indexes, handling the update store, and exposing an HTTP API. Search and indexation are the domain of our core engine, [`milli`](https://github.com/meilisearch/milli), while tokenization is handled by [our `charabia` library](https://github.com/meilisearch/charabia/). If Meilisearch does not offer optimized support for your language, please consider contributing to `charabia` by following the [CONTRIBUTING.md file](https://github.com/meilisearch/charabia/blob/main/CONTRIBUTING.md) and integrating your intended normalizer/segmenter. diff --git a/Cargo.lock b/Cargo.lock index bdca7e24c..a1527c31c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -36,9 +36,9 @@ dependencies = [ [[package]] name = "actix-http" -version = "3.6.0" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d223b13fd481fc0d1f83bb12659ae774d9e3601814c68a0bc539731698cca743" +checksum = "129d4c88e98860e1758c5de288d1632b07970a16d59bdf7b8d66053d582bb71f" dependencies = [ "actix-codec", "actix-rt", @@ -138,9 +138,9 @@ dependencies = [ [[package]] name = "actix-tls" -version = "3.3.0" +version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4cce60a2f2b477bc72e5cde0af1812a6e82d8fd85b5570a5dcf2a5bf2c5be5f" +checksum = "72616e7fbec0aa99c6f3164677fa48ff5a60036d0799c98cab894a44f3e0efc3" dependencies = [ "actix-rt", "actix-service", @@ -148,11 +148,13 @@ dependencies = [ "futures-core", "impl-more", "pin-project-lite", + "rustls 0.21.6", + "rustls-webpki", "tokio", - "tokio-rustls", + "tokio-rustls 0.23.4", "tokio-util", "tracing", - "webpki-roots", + "webpki-roots 0.22.6", ] [[package]] @@ -167,9 +169,9 @@ dependencies = [ [[package]] name = "actix-web" -version = "4.5.1" +version = "4.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a6556ddebb638c2358714d853257ed226ece6023ef9364f23f0c70737ea984" +checksum = "e43428f3bf11dee6d166b00ec2df4e3aa8cc1606aaa0b7433c146852e2f4e03b" dependencies = [ "actix-codec", "actix-http", @@ -257,9 +259,9 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.11" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff" dependencies = [ "cfg-if", "getrandom", @@ -494,7 +496,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "benchmarks" -version = "1.8.0" +version = "1.7.1" dependencies = [ "anyhow", "bytes", @@ -628,7 +630,7 @@ dependencies = [ [[package]] name = "build-info" -version = "1.8.0" +version = "1.7.1" dependencies = [ "anyhow", "time", @@ -833,9 +835,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.0.83" +version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +checksum = "305fe645edc1442a0fa8b6726ba61d422798d37a52e12eaecf4b022ebbb88f01" dependencies = [ "jobserver", "libc", @@ -1529,7 +1531,7 @@ dependencies = [ [[package]] name = "dump" -version = "1.8.0" +version = "1.7.1" dependencies = [ "anyhow", "big_s", @@ -1767,7 +1769,7 @@ dependencies = [ [[package]] name = "file-store" -version = "1.8.0" +version = "1.7.1" dependencies = [ "faux", "tempfile", @@ -1790,7 +1792,7 @@ dependencies = [ [[package]] name = "filter-parser" -version = "1.8.0" +version = "1.7.1" dependencies = [ "insta", "nom", @@ -1810,7 +1812,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "1.8.0" +version = "1.7.1" dependencies = [ "criterion", "serde_json", @@ -1928,7 +1930,7 @@ dependencies = [ [[package]] name = "fuzzers" -version = "1.8.0" +version = "1.7.1" dependencies = [ "arbitrary", "clap", @@ -2102,10 +2104,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", - "js-sys", "libc", "wasi", - "wasm-bindgen", ] [[package]] @@ -2224,7 +2224,7 @@ dependencies = [ "atomic-polyfill", "hash32", "rustc_version", - "spin", + "spin 0.9.8", "stable_deref_trait", ] @@ -2393,9 +2393,9 @@ dependencies = [ "futures-util", "http 0.2.11", "hyper", - "rustls", + "rustls 0.21.6", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.1", ] [[package]] @@ -2422,7 +2422,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d" [[package]] name = "index-scheduler" -version = "1.8.0" +version = "1.7.1" dependencies = [ "anyhow", "big_s", @@ -2609,7 +2609,7 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "1.8.0" +version = "1.7.1" dependencies = [ "criterion", "serde_json", @@ -2617,14 +2617,13 @@ dependencies = [ [[package]] name = "jsonwebtoken" -version = "9.2.0" +version = "8.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c7ea04a7c5c055c175f189b6dc6ba036fd62306b58c66c9f6389036c503a3f4" +checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378" dependencies = [ "base64 0.21.7", - "js-sys", "pem", - "ring", + "ring 0.16.20", "serde", "serde_json", "simple_asn1", @@ -3118,7 +3117,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meili-snap" -version = "1.8.0" +version = "1.7.1" dependencies = [ "insta", "md5", @@ -3127,7 +3126,7 @@ dependencies = [ [[package]] name = "meilisearch" -version = "1.8.0" +version = "1.7.1" dependencies = [ "actix-cors", "actix-http", @@ -3185,7 +3184,7 @@ dependencies = [ "rayon", "regex", "reqwest", - "rustls", + "rustls 0.20.9", "rustls-pemfile", "segment", "serde", @@ -3220,7 +3219,7 @@ dependencies = [ [[package]] name = "meilisearch-auth" -version = "1.8.0" +version = "1.7.1" dependencies = [ "base64 0.21.7", "enum-iterator", @@ -3239,7 +3238,7 @@ dependencies = [ [[package]] name = "meilisearch-types" -version = "1.8.0" +version = "1.7.1" dependencies = [ "actix-web", "anyhow", @@ -3269,7 +3268,7 @@ dependencies = [ [[package]] name = "meilitool" -version = "1.8.0" +version = "1.7.1" dependencies = [ "anyhow", "clap", @@ -3308,7 +3307,7 @@ dependencies = [ [[package]] name = "milli" -version = "1.8.0" +version = "1.7.1" dependencies = [ "arroy", "big_s", @@ -3414,9 +3413,9 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.11" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0" dependencies = [ "libc", "log", @@ -3734,12 +3733,11 @@ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" [[package]] name = "pem" -version = "3.0.3" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310" +checksum = "a8835c273a76a90455d7344889b0964598e3316e2a79ede8e36f16bdcf2228b8" dependencies = [ - "base64 0.21.7", - "serde", + "base64 0.13.1", ] [[package]] @@ -3750,7 +3748,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "permissive-json-pointer" -version = "1.8.0" +version = "1.7.1" dependencies = [ "big_s", "serde_json", @@ -4241,14 +4239,14 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls", + "rustls 0.21.6", "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", "system-configuration", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.1", "tokio-util", "tower-service", "url", @@ -4256,7 +4254,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots", + "webpki-roots 0.25.3", "winreg", ] @@ -4274,15 +4272,30 @@ checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1" [[package]] name = "ring" -version = "0.17.7" +version = "0.16.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin 0.5.2", + "untrusted 0.7.1", + "web-sys", + "winapi", +] + +[[package]] +name = "ring" +version = "0.17.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9babe80d5c16becf6594aa32ad2be8fe08498e7ae60b77de8df700e67f191d7e" dependencies = [ "cc", "getrandom", "libc", - "spin", - "untrusted", + "spin 0.9.8", + "untrusted 0.9.0", "windows-sys 0.48.0", ] @@ -4360,12 +4373,24 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.10" +version = "0.20.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" +checksum = "1b80e3dec595989ea8510028f30c408a4630db12c9cbb8de34203b89d6577e99" dependencies = [ "log", - "ring", + "ring 0.16.20", + "sct", + "webpki", +] + +[[package]] +name = "rustls" +version = "0.21.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1feddffcfcc0b33f5c6ce9a29e341e4cd59c3f78e7ee45f4a40c038b1d6cbb" +dependencies = [ + "log", + "ring 0.16.20", "rustls-webpki", "sct", ] @@ -4385,8 +4410,8 @@ version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring", - "untrusted", + "ring 0.17.3", + "untrusted 0.9.0", ] [[package]] @@ -4428,12 +4453,12 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "sct" -version = "0.7.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" dependencies = [ - "ring", - "untrusted", + "ring 0.16.20", + "untrusted 0.7.1", ] [[package]] @@ -4696,6 +4721,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + [[package]] name = "spin" version = "0.9.8" @@ -5049,13 +5080,24 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "tokio-rustls" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" +dependencies = [ + "rustls 0.20.9", + "tokio", + "webpki", +] + [[package]] name = "tokio-rustls" version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls", + "rustls 0.21.6", "tokio", ] @@ -5324,6 +5366,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" +[[package]] +name = "untrusted" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" + [[package]] name = "untrusted" version = "0.9.0" @@ -5340,13 +5388,13 @@ dependencies = [ "flate2", "log", "once_cell", - "rustls", + "rustls 0.21.6", "rustls-webpki", "serde", "serde_json", "socks", "url", - "webpki-roots", + "webpki-roots 0.25.3", ] [[package]] @@ -5582,6 +5630,25 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07ecc0cd7cac091bf682ec5efa18b1cff79d617b84181f38b3951dbe135f607f" +dependencies = [ + "ring 0.16.20", + "untrusted 0.7.1", +] + +[[package]] +name = "webpki-roots" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87" +dependencies = [ + "webpki", +] + [[package]] name = "webpki-roots" version = "0.25.3" @@ -5876,7 +5943,7 @@ dependencies = [ [[package]] name = "xtask" -version = "1.8.0" +version = "1.7.1" dependencies = [ "anyhow", "build-info", diff --git a/Cargo.toml b/Cargo.toml index 1d0e0ca0d..5337ec5c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ members = [ ] [workspace.package] -version = "1.8.0" +version = "1.7.1" authors = [ "Quentin de Quelen ", "Clément Renault ", diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 7709d33d7..b5460fb56 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -11,7 +11,7 @@ edition.workspace = true license.workspace = true [dependencies] -actix-web = { version = "4.5.1", default-features = false } +actix-web = { version = "4.4.1", default-features = false } anyhow = "1.0.79" convert_case = "0.6.0" csv = "1.3.0" diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 04b919904..b65c466ca 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -14,18 +14,18 @@ default-run = "meilisearch" [dependencies] actix-cors = "0.7.0" -actix-http = { version = "3.6.0", default-features = false, features = [ +actix-http = { version = "3.5.1", default-features = false, features = [ "compress-brotli", "compress-gzip", - "rustls-0_21", + "rustls", ] } actix-utils = "3.0.1" -actix-web = { version = "4.5.1", default-features = false, features = [ +actix-web = { version = "4.4.1", default-features = false, features = [ "macros", "compress-brotli", "compress-gzip", "cookies", - "rustls-0_21", + "rustls", ] } actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true } anyhow = { version = "1.0.79", features = ["backtrace"] } @@ -52,7 +52,7 @@ index-scheduler = { path = "../index-scheduler" } indexmap = { version = "2.1.0", features = ["serde"] } is-terminal = "0.4.10" itertools = "0.11.0" -jsonwebtoken = "9.2.0" +jsonwebtoken = "8.3.0" lazy_static = "1.4.0" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } @@ -75,7 +75,7 @@ reqwest = { version = "0.11.23", features = [ "rustls-tls", "json", ], default-features = false } -rustls = "0.21.6" +rustls = "0.20.8" rustls-pemfile = "1.0.2" segment = { version = "0.2.3", optional = true } serde = { version = "1.0.195", features = ["derive"] } diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index af02f58e1..3451325b2 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -151,7 +151,7 @@ async fn run_http( .keep_alive(KeepAlive::Os); if let Some(config) = opt_clone.get_ssl_config()? { - http_server.bind_rustls_021(opt_clone.http_addr, config)?.run().await?; + http_server.bind_rustls(opt_clone.http_addr, config)?.run().await?; } else { http_server.bind(&opt_clone.http_addr)?.run().await?; } diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 43bf2c62c..92d53fd32 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -564,11 +564,11 @@ impl Opt { } if self.ssl_require_auth { let verifier = AllowAnyAuthenticatedClient::new(client_auth_roots); - config.with_client_cert_verifier(Arc::from(verifier)) + config.with_client_cert_verifier(verifier) } else { let verifier = AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots); - config.with_client_cert_verifier(Arc::from(verifier)) + config.with_client_cert_verifier(verifier) } } None => config.with_no_client_auth(), diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index c782e78cb..c71d83279 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -604,7 +604,6 @@ fn embedder_analytics( EmbedderSource::OpenAi => sources.insert("openAi"), EmbedderSource::HuggingFace => sources.insert("huggingFace"), EmbedderSource::UserProvided => sources.insert("userProvided"), - EmbedderSource::Ollama => sources.insert("ollama"), }; } }; diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index e65192d16..27de36c6d 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -530,7 +530,7 @@ pub fn perform_search( // The attributes to retrieve are the ones explicitly marked as to retrieve (all by default), // but these attributes must be also be present // - in the fields_ids_map - // - in the displayed attributes + // - in the the displayed attributes let to_retrieve_ids: BTreeSet<_> = query .attributes_to_retrieve .as_ref() @@ -671,16 +671,27 @@ pub fn perform_search( let sort_facet_values_by = index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?; + let default_sort_facet_values_by = + sort_facet_values_by.get("*").copied().unwrap_or_default(); if fields.iter().all(|f| f != "*") { - let fields: Vec<_> = - fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect(); + let fields: Vec<_> = fields + .iter() + .map(|n| { + ( + n, + sort_facet_values_by + .get(n) + .copied() + .unwrap_or(default_sort_facet_values_by), + ) + }) + .collect(); facet_distribution.facets(fields); } - let distribution = facet_distribution .candidates(candidates) - .default_order_by(sort_facet_values_by.get("*")) + .default_order_by(default_sort_facet_values_by) .execute()?; let stats = facet_distribution.compute_stats()?; (Some(distribution), Some(stats)) diff --git a/meilisearch/tests/documents/add_documents.rs b/meilisearch/tests/documents/add_documents.rs index b1262fa2d..e6af85229 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/meilisearch/tests/documents/add_documents.rs @@ -1237,8 +1237,8 @@ async fn error_add_documents_missing_document_id() { } #[actix_rt::test] -#[should_panic] -async fn error_document_field_limit_reached_in_one_document() { +#[ignore] // // TODO: Fix in an other PR: this does not provoke any error. +async fn error_document_field_limit_reached() { let server = Server::new().await; let index = server.index("test"); @@ -1246,241 +1246,22 @@ async fn error_document_field_limit_reached_in_one_document() { let mut big_object = std::collections::HashMap::new(); big_object.insert("id".to_owned(), "wow"); - for i in 0..(u16::MAX as usize + 1) { + for i in 0..65535 { let key = i.to_string(); big_object.insert(key, "I am a text!"); } let documents = json!([big_object]); - let (response, code) = index.update_documents(documents, Some("id")).await; - snapshot!(code, @"500 Internal Server Error"); + let (_response, code) = index.update_documents(documents, Some("id")).await; + snapshot!(code, @"202"); - let response = index.wait_task(response.uid()).await; - snapshot!(code, @"202 Accepted"); + index.wait_task(0).await; + let (response, code) = index.get_task(0).await; + snapshot!(code, @"200"); // Documents without a primary key are not accepted. - snapshot!(response, - @r###" - { - "uid": 1, - "indexUid": "test", - "status": "succeeded", - "type": "documentAdditionOrUpdate", - "canceledBy": null, - "details": { - "receivedDocuments": 1, - "indexedDocuments": 1 - }, - "error": null, - "duration": "[duration]", - "enqueuedAt": "[date]", - "startedAt": "[date]", - "finishedAt": "[date]" - } - "###); -} - -#[actix_rt::test] -async fn error_document_field_limit_reached_over_multiple_documents() { - let server = Server::new().await; - let index = server.index("test"); - - index.create(Some("id")).await; - - let mut big_object = std::collections::HashMap::new(); - big_object.insert("id".to_owned(), "wow"); - for i in 0..(u16::MAX / 2) { - let key = i.to_string(); - big_object.insert(key, "I am a text!"); - } - - let documents = json!([big_object]); - - let (response, code) = index.update_documents(documents, Some("id")).await; - snapshot!(code, @"202 Accepted"); - - let response = index.wait_task(response.uid()).await; - snapshot!(code, @"202 Accepted"); - snapshot!(response, - @r###" - { - "uid": 1, - "indexUid": "test", - "status": "succeeded", - "type": "documentAdditionOrUpdate", - "canceledBy": null, - "details": { - "receivedDocuments": 1, - "indexedDocuments": 1 - }, - "error": null, - "duration": "[duration]", - "enqueuedAt": "[date]", - "startedAt": "[date]", - "finishedAt": "[date]" - } - "###); - - let mut big_object = std::collections::HashMap::new(); - big_object.insert("id".to_owned(), "waw"); - for i in (u16::MAX as usize / 2)..(u16::MAX as usize + 1) { - let key = i.to_string(); - big_object.insert(key, "I am a text!"); - } - - let documents = json!([big_object]); - - let (response, code) = index.update_documents(documents, Some("id")).await; - snapshot!(code, @"202 Accepted"); - - let response = index.wait_task(response.uid()).await; - snapshot!(code, @"202 Accepted"); - snapshot!(response, - @r###" - { - "uid": 2, - "indexUid": "test", - "status": "failed", - "type": "documentAdditionOrUpdate", - "canceledBy": null, - "details": { - "receivedDocuments": 1, - "indexedDocuments": 0 - }, - "error": { - "message": "A document cannot contain more than 65,535 fields.", - "code": "max_fields_limit_exceeded", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#max_fields_limit_exceeded" - }, - "duration": "[duration]", - "enqueuedAt": "[date]", - "startedAt": "[date]", - "finishedAt": "[date]" - } - "###); -} - -#[actix_rt::test] -async fn error_document_field_limit_reached_in_one_nested_document() { - let server = Server::new().await; - let index = server.index("test"); - - index.create(Some("id")).await; - - let mut nested = std::collections::HashMap::new(); - for i in 0..(u16::MAX as usize + 1) { - let key = i.to_string(); - nested.insert(key, "I am a text!"); - } - let mut big_object = std::collections::HashMap::new(); - big_object.insert("id".to_owned(), "wow"); - - let documents = json!([big_object]); - - let (response, code) = index.update_documents(documents, Some("id")).await; - snapshot!(code, @"202 Accepted"); - - let response = index.wait_task(response.uid()).await; - snapshot!(code, @"202 Accepted"); - // Documents without a primary key are not accepted. - snapshot!(response, - @r###" - { - "uid": 1, - "indexUid": "test", - "status": "succeeded", - "type": "documentAdditionOrUpdate", - "canceledBy": null, - "details": { - "receivedDocuments": 1, - "indexedDocuments": 1 - }, - "error": null, - "duration": "[duration]", - "enqueuedAt": "[date]", - "startedAt": "[date]", - "finishedAt": "[date]" - } - "###); -} - -#[actix_rt::test] -async fn error_document_field_limit_reached_over_multiple_documents_with_nested_fields() { - let server = Server::new().await; - let index = server.index("test"); - - index.create(Some("id")).await; - - let mut nested = std::collections::HashMap::new(); - for i in 0..(u16::MAX / 2) { - let key = i.to_string(); - nested.insert(key, "I am a text!"); - } - let mut big_object = std::collections::HashMap::new(); - big_object.insert("id".to_owned(), "wow"); - - let documents = json!([big_object]); - - let (response, code) = index.update_documents(documents, Some("id")).await; - snapshot!(code, @"202 Accepted"); - - let response = index.wait_task(response.uid()).await; - snapshot!(code, @"202 Accepted"); - snapshot!(response, - @r###" - { - "uid": 1, - "indexUid": "test", - "status": "succeeded", - "type": "documentAdditionOrUpdate", - "canceledBy": null, - "details": { - "receivedDocuments": 1, - "indexedDocuments": 1 - }, - "error": null, - "duration": "[duration]", - "enqueuedAt": "[date]", - "startedAt": "[date]", - "finishedAt": "[date]" - } - "###); - - let mut nested = std::collections::HashMap::new(); - for i in 0..(u16::MAX / 2) { - let key = i.to_string(); - nested.insert(key, "I am a text!"); - } - let mut big_object = std::collections::HashMap::new(); - big_object.insert("id".to_owned(), "wow"); - - let documents = json!([big_object]); - - let (response, code) = index.update_documents(documents, Some("id")).await; - snapshot!(code, @"202 Accepted"); - - let response = index.wait_task(response.uid()).await; - snapshot!(code, @"202 Accepted"); - snapshot!(response, - @r###" - { - "uid": 2, - "indexUid": "test", - "status": "succeeded", - "type": "documentAdditionOrUpdate", - "canceledBy": null, - "details": { - "receivedDocuments": 1, - "indexedDocuments": 1 - }, - "error": null, - "duration": "[duration]", - "enqueuedAt": "[date]", - "startedAt": "[date]", - "finishedAt": "[date]" - } - "###); + snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), + @""); } #[actix_rt::test] diff --git a/meilisearch/tests/search/facet_search.rs b/meilisearch/tests/search/facet_search.rs index 12d2226a9..5f9f631f9 100644 --- a/meilisearch/tests/search/facet_search.rs +++ b/meilisearch/tests/search/facet_search.rs @@ -123,28 +123,6 @@ async fn simple_facet_search_with_max_values() { assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1); } -#[actix_rt::test] -async fn simple_facet_search_by_count_with_max_values() { - let server = Server::new().await; - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - index - .update_settings_faceting( - json!({ "maxValuesPerFacet": 1, "sortFacetValuesBy": { "*": "count" } }), - ) - .await; - index.update_settings_filterable_attributes(json!(["genres"])).await; - index.add_documents(documents, None).await; - index.wait_task(2).await; - - let (response, code) = - index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; - - assert_eq!(code, 200, "{}", response); - assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1); -} - #[actix_rt::test] async fn non_filterable_facet_search_error() { let server = Server::new().await; @@ -179,24 +157,3 @@ async fn facet_search_dont_support_words() { assert_eq!(code, 200, "{}", response); assert_eq!(response["facetHits"].as_array().unwrap().len(), 0); } - -#[actix_rt::test] -async fn simple_facet_search_with_sort_by_count() { - let server = Server::new().await; - let index = server.index("test"); - - let documents = DOCUMENTS.clone(); - index.update_settings_faceting(json!({ "sortFacetValuesBy": { "*": "count" } })).await; - index.update_settings_filterable_attributes(json!(["genres"])).await; - index.add_documents(documents, None).await; - index.wait_task(2).await; - - let (response, code) = - index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; - - assert_eq!(code, 200, "{}", response); - let hits = response["facetHits"].as_array().unwrap(); - assert_eq!(hits.len(), 2); - assert_eq!(hits[0], json!({ "value": "Action", "count": 3 })); - assert_eq!(hits[1], json!({ "value": "Adventure", "count": 2 })); -} diff --git a/milli/src/index.rs b/milli/src/index.rs index 2c3977403..6ad39dcb1 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -20,13 +20,13 @@ use crate::heed_codec::facet::{ use crate::heed_codec::{ BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec, }; -use crate::order_by_map::OrderByMap; use crate::proximity::ProximityPrecision; use crate::vector::EmbeddingConfig; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, - Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64, + OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, + BEU32, BEU64, }; pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; @@ -1373,19 +1373,21 @@ impl Index { self.main.remap_key_type::().delete(txn, main_key::MAX_VALUES_PER_FACET) } - pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result { - let orders = self + pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result> { + let mut orders = self .main - .remap_types::>() + .remap_types::>>() .get(txn, main_key::SORT_FACET_VALUES_BY)? .unwrap_or_default(); + // Insert the default ordering if it is not already overwritten by the user. + orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic); Ok(orders) } pub(crate) fn put_sort_facet_values_by( &self, txn: &mut RwTxn, - val: &OrderByMap, + val: &HashMap, ) -> heed::Result<()> { self.main.remap_types::>().put(txn, main_key::SORT_FACET_VALUES_BY, &val) } diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 5effcea3d..f6b398304 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -16,7 +16,6 @@ pub mod facet; mod fields_ids_map; pub mod heed_codec; pub mod index; -pub mod order_by_map; pub mod prompt; pub mod proximity; pub mod score_details; @@ -57,10 +56,10 @@ pub use self::heed_codec::{ UncheckedU8StrStrCodec, }; pub use self::index::Index; -pub use self::search::facet::{FacetValueHit, SearchForFacetValues}; pub use self::search::{ - FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy, - Search, SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, + FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder, + MatchingWords, OrderBy, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy, + DEFAULT_VALUES_PER_FACET, }; pub type Result = std::result::Result; diff --git a/milli/src/order_by_map.rs b/milli/src/order_by_map.rs deleted file mode 100644 index 287e62c3a..000000000 --- a/milli/src/order_by_map.rs +++ /dev/null @@ -1,57 +0,0 @@ -use std::collections::{hash_map, HashMap}; -use std::iter::FromIterator; - -use serde::{Deserialize, Deserializer, Serialize}; - -use crate::OrderBy; - -#[derive(Serialize)] -pub struct OrderByMap(HashMap); - -impl OrderByMap { - pub fn get(&self, key: impl AsRef) -> OrderBy { - self.0 - .get(key.as_ref()) - .copied() - .unwrap_or_else(|| self.0.get("*").copied().unwrap_or_default()) - } - - pub fn insert(&mut self, key: String, value: OrderBy) -> Option { - self.0.insert(key, value) - } -} - -impl Default for OrderByMap { - fn default() -> Self { - let mut map = HashMap::new(); - map.insert("*".to_string(), OrderBy::Lexicographic); - OrderByMap(map) - } -} - -impl FromIterator<(String, OrderBy)> for OrderByMap { - fn from_iter>(iter: T) -> Self { - OrderByMap(iter.into_iter().collect()) - } -} - -impl IntoIterator for OrderByMap { - type Item = (String, OrderBy); - type IntoIter = hash_map::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() - } -} - -impl<'de> Deserialize<'de> for OrderByMap { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let mut map = Deserialize::deserialize(deserializer).map(OrderByMap)?; - // Insert the default ordering if it is not already overwritten by the user. - map.0.entry("*".to_string()).or_insert(OrderBy::default()); - Ok(map) - } -} diff --git a/milli/src/search/facet/facet_range_search.rs b/milli/src/search/facet/facet_range_search.rs index e340fbac5..f1a26ded5 100644 --- a/milli/src/search/facet/facet_range_search.rs +++ b/milli/src/search/facet/facet_range_search.rs @@ -168,7 +168,7 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { } // should we stop? - // We should if the search range doesn't include any + // We should if the the search range doesn't include any // element from the previous key or its successors let should_stop = { match self.right { @@ -232,7 +232,7 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { } // should we stop? - // We should if the search range doesn't include any + // We should if the the search range doesn't include any // element from the previous key or its successors let should_stop = { match self.right { diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index 34a9cdcb8..f676ee109 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -6,18 +6,15 @@ use roaring::RoaringBitmap; pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::filter::{BadGeoError, Filter}; -pub use self::search::{FacetValueHit, SearchForFacetValues}; use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec}; use crate::heed_codec::BytesRefCodec; use crate::{Index, Result}; - mod facet_distribution; mod facet_distribution_iter; mod facet_range_search; mod facet_sort_ascending; mod facet_sort_descending; mod filter; -mod search; fn facet_extreme_value<'t>( mut extreme_it: impl Iterator> + 't, diff --git a/milli/src/search/facet/search.rs b/milli/src/search/facet/search.rs deleted file mode 100644 index 0251d6b8d..000000000 --- a/milli/src/search/facet/search.rs +++ /dev/null @@ -1,326 +0,0 @@ -use std::cmp::{Ordering, Reverse}; -use std::collections::BinaryHeap; -use std::ops::ControlFlow; - -use charabia::normalizer::NormalizerOption; -use charabia::Normalize; -use fst::automaton::{Automaton, Str}; -use fst::{IntoStreamer, Streamer}; -use roaring::RoaringBitmap; -use tracing::error; - -use crate::error::UserError; -use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; -use crate::search::build_dfa; -use crate::{DocumentId, FieldId, OrderBy, Result, Search}; - -/// The maximum number of values per facet returned by the facet search route. -const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100; - -pub struct SearchForFacetValues<'a> { - query: Option, - facet: String, - search_query: Search<'a>, - max_values: usize, - is_hybrid: bool, -} - -impl<'a> SearchForFacetValues<'a> { - pub fn new( - facet: String, - search_query: Search<'a>, - is_hybrid: bool, - ) -> SearchForFacetValues<'a> { - SearchForFacetValues { - query: None, - facet, - search_query, - max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET, - is_hybrid, - } - } - - pub fn query(&mut self, query: impl Into) -> &mut Self { - self.query = Some(query.into()); - self - } - - pub fn max_values(&mut self, max: usize) -> &mut Self { - self.max_values = max; - self - } - - fn one_original_value_of( - &self, - field_id: FieldId, - facet_str: &str, - any_docid: DocumentId, - ) -> Result> { - let index = self.search_query.index; - let rtxn = self.search_query.rtxn; - let key: (FieldId, _, &str) = (field_id, any_docid, facet_str); - Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned())) - } - - pub fn execute(&self) -> Result> { - let index = self.search_query.index; - let rtxn = self.search_query.rtxn; - - let filterable_fields = index.filterable_fields(rtxn)?; - if !filterable_fields.contains(&self.facet) { - let (valid_fields, hidden_fields) = - index.remove_hidden_fields(rtxn, filterable_fields)?; - - return Err(UserError::InvalidFacetSearchFacetName { - field: self.facet.clone(), - valid_fields, - hidden_fields, - } - .into()); - } - - let fields_ids_map = index.fields_ids_map(rtxn)?; - let fid = match fields_ids_map.id(&self.facet) { - Some(fid) => fid, - // we return an empty list of results when the attribute has been - // set as filterable but no document contains this field (yet). - None => return Ok(Vec::new()), - }; - - let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? { - Some(fst) => fst, - None => return Ok(Vec::new()), - }; - - let search_candidates = self - .search_query - .execute_for_candidates(self.is_hybrid || self.search_query.vector.is_some())?; - - let mut results = match index.sort_facet_values_by(rtxn)?.get(&self.facet) { - OrderBy::Lexicographic => ValuesCollection::by_lexicographic(self.max_values), - OrderBy::Count => ValuesCollection::by_count(self.max_values), - }; - - match self.query.as_ref() { - Some(query) => { - let options = NormalizerOption { lossy: true, ..Default::default() }; - let query = query.normalize(&options); - let query = query.as_ref(); - - let authorize_typos = self.search_query.index.authorize_typos(rtxn)?; - let field_authorizes_typos = - !self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid); - - if authorize_typos && field_authorizes_typos { - let exact_words_fst = self.search_query.index.exact_words(rtxn)?; - if exact_words_fst.map_or(false, |fst| fst.contains(query)) { - if fst.contains(query) { - self.fetch_original_facets_using_normalized( - fid, - query, - query, - &search_candidates, - &mut results, - )?; - } - } else { - let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?; - let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?; - - let is_prefix = true; - let automaton = if query.len() < one_typo as usize { - build_dfa(query, 0, is_prefix) - } else if query.len() < two_typos as usize { - build_dfa(query, 1, is_prefix) - } else { - build_dfa(query, 2, is_prefix) - }; - - let mut stream = fst.search(automaton).into_stream(); - while let Some(facet_value) = stream.next() { - let value = std::str::from_utf8(facet_value)?; - if self - .fetch_original_facets_using_normalized( - fid, - value, - query, - &search_candidates, - &mut results, - )? - .is_break() - { - break; - } - } - } - } else { - let automaton = Str::new(query).starts_with(); - let mut stream = fst.search(automaton).into_stream(); - while let Some(facet_value) = stream.next() { - let value = std::str::from_utf8(facet_value)?; - if self - .fetch_original_facets_using_normalized( - fid, - value, - query, - &search_candidates, - &mut results, - )? - .is_break() - { - break; - } - } - } - } - None => { - let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" }; - for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { - let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = - result?; - let count = search_candidates.intersection_len(&bitmap); - if count != 0 { - let value = self - .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? - .unwrap_or_else(|| left_bound.to_string()); - if results.insert(FacetValueHit { value, count }).is_break() { - break; - } - } - } - } - } - - Ok(results.into_sorted_vec()) - } - - fn fetch_original_facets_using_normalized( - &self, - fid: FieldId, - value: &str, - query: &str, - search_candidates: &RoaringBitmap, - results: &mut ValuesCollection, - ) -> Result> { - let index = self.search_query.index; - let rtxn = self.search_query.rtxn; - - let database = index.facet_id_normalized_string_strings; - let key = (fid, value); - let original_strings = match database.get(rtxn, &key)? { - Some(original_strings) => original_strings, - None => { - error!("the facet value is missing from the facet database: {key:?}"); - return Ok(ControlFlow::Continue(())); - } - }; - for original in original_strings { - let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() }; - let docids = match index.facet_id_string_docids.get(rtxn, &key)? { - Some(FacetGroupValue { bitmap, .. }) => bitmap, - None => { - error!("the facet value is missing from the facet database: {key:?}"); - return Ok(ControlFlow::Continue(())); - } - }; - let count = search_candidates.intersection_len(&docids); - if count != 0 { - let value = self - .one_original_value_of(fid, &original, docids.min().unwrap())? - .unwrap_or_else(|| query.to_string()); - if results.insert(FacetValueHit { value, count }).is_break() { - break; - } - } - } - - Ok(ControlFlow::Continue(())) - } -} - -#[derive(Debug, Clone, serde::Serialize, PartialEq)] -pub struct FacetValueHit { - /// The original facet value - pub value: String, - /// The number of documents associated to this facet - pub count: u64, -} - -impl PartialOrd for FacetValueHit { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for FacetValueHit { - fn cmp(&self, other: &Self) -> Ordering { - self.count.cmp(&other.count).then_with(|| self.value.cmp(&other.value)) - } -} - -impl Eq for FacetValueHit {} - -/// A wrapper type that collects the best facet values by -/// lexicographic or number of associated values. -enum ValuesCollection { - /// Keeps the top values according to the lexicographic order. - Lexicographic { max: usize, content: Vec }, - /// Keeps the top values according to the number of values associated to them. - /// - /// Note that it is a max heap and we need to move the smallest counts - /// at the top to be able to pop them when we reach the max_values limit. - Count { max: usize, content: BinaryHeap> }, -} - -impl ValuesCollection { - pub fn by_lexicographic(max: usize) -> Self { - ValuesCollection::Lexicographic { max, content: Vec::new() } - } - - pub fn by_count(max: usize) -> Self { - ValuesCollection::Count { max, content: BinaryHeap::new() } - } - - pub fn insert(&mut self, value: FacetValueHit) -> ControlFlow<()> { - match self { - ValuesCollection::Lexicographic { max, content } => { - if content.len() < *max { - content.push(value); - if content.len() < *max { - return ControlFlow::Continue(()); - } - } - ControlFlow::Break(()) - } - ValuesCollection::Count { max, content } => { - if content.len() == *max { - // Peeking gives us the worst value in the list as - // this is a max-heap and we reversed it. - let Some(mut peek) = content.peek_mut() else { return ControlFlow::Break(()) }; - if peek.0.count <= value.count { - // Replace the current worst value in the heap - // with the new one we received that is better. - *peek = Reverse(value); - } - } else { - content.push(Reverse(value)); - } - ControlFlow::Continue(()) - } - } - } - - /// Returns the list of facet values in descending order of, either, - /// count or lexicographic order of the value depending on the type. - pub fn into_sorted_vec(self) -> Vec { - match self { - ValuesCollection::Lexicographic { content, .. } => content.into_iter().collect(), - ValuesCollection::Count { content, .. } => { - // Convert the heap into a vec of hits by removing the Reverse wrapper. - // Hits are already in the right order as they were reversed and there - // are output in ascending order. - content.into_sorted_vec().into_iter().map(|Reverse(hit)| hit).collect() - } - } - } -} diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index dc8354486..e411bd032 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -1,17 +1,25 @@ use std::fmt; +use std::ops::ControlFlow; +use charabia::normalizer::NormalizerOption; +use charabia::Normalize; +use fst::automaton::{Automaton, Str}; +use fst::{IntoStreamer, Streamer}; use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; +use tracing::error; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; use self::new::{execute_vector_search, PartialSearchResult}; +use crate::error::UserError; +use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::vector::DistributionShift; use crate::{ - execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, - SearchContext, + execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, + Result, SearchContext, }; // Building these factories is not free. @@ -19,6 +27,9 @@ static LEVDIST0: Lazy = Lazy::new(|| LevBuilder::new(0, true)); static LEVDIST1: Lazy = Lazy::new(|| LevBuilder::new(1, true)); static LEVDIST2: Lazy = Lazy::new(|| LevBuilder::new(2, true)); +/// The maximum number of values per facet returned by the facet search route. +const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100; + pub mod facet; mod fst_utils; pub mod hybrid; @@ -291,6 +302,240 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA { } } +pub struct SearchForFacetValues<'a> { + query: Option, + facet: String, + search_query: Search<'a>, + max_values: usize, + is_hybrid: bool, +} + +impl<'a> SearchForFacetValues<'a> { + pub fn new( + facet: String, + search_query: Search<'a>, + is_hybrid: bool, + ) -> SearchForFacetValues<'a> { + SearchForFacetValues { + query: None, + facet, + search_query, + max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET, + is_hybrid, + } + } + + pub fn query(&mut self, query: impl Into) -> &mut Self { + self.query = Some(query.into()); + self + } + + pub fn max_values(&mut self, max: usize) -> &mut Self { + self.max_values = max; + self + } + + fn one_original_value_of( + &self, + field_id: FieldId, + facet_str: &str, + any_docid: DocumentId, + ) -> Result> { + let index = self.search_query.index; + let rtxn = self.search_query.rtxn; + let key: (FieldId, _, &str) = (field_id, any_docid, facet_str); + Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned())) + } + + pub fn execute(&self) -> Result> { + let index = self.search_query.index; + let rtxn = self.search_query.rtxn; + + let filterable_fields = index.filterable_fields(rtxn)?; + if !filterable_fields.contains(&self.facet) { + let (valid_fields, hidden_fields) = + index.remove_hidden_fields(rtxn, filterable_fields)?; + + return Err(UserError::InvalidFacetSearchFacetName { + field: self.facet.clone(), + valid_fields, + hidden_fields, + } + .into()); + } + + let fields_ids_map = index.fields_ids_map(rtxn)?; + let fid = match fields_ids_map.id(&self.facet) { + Some(fid) => fid, + // we return an empty list of results when the attribute has been + // set as filterable but no document contains this field (yet). + None => return Ok(Vec::new()), + }; + + let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? { + Some(fst) => fst, + None => return Ok(vec![]), + }; + + let search_candidates = self + .search_query + .execute_for_candidates(self.is_hybrid || self.search_query.vector.is_some())?; + + match self.query.as_ref() { + Some(query) => { + let options = NormalizerOption { lossy: true, ..Default::default() }; + let query = query.normalize(&options); + let query = query.as_ref(); + + let authorize_typos = self.search_query.index.authorize_typos(rtxn)?; + let field_authorizes_typos = + !self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid); + + if authorize_typos && field_authorizes_typos { + let exact_words_fst = self.search_query.index.exact_words(rtxn)?; + if exact_words_fst.map_or(false, |fst| fst.contains(query)) { + let mut results = vec![]; + if fst.contains(query) { + self.fetch_original_facets_using_normalized( + fid, + query, + query, + &search_candidates, + &mut results, + )?; + } + Ok(results) + } else { + let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?; + let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?; + + let is_prefix = true; + let automaton = if query.len() < one_typo as usize { + build_dfa(query, 0, is_prefix) + } else if query.len() < two_typos as usize { + build_dfa(query, 1, is_prefix) + } else { + build_dfa(query, 2, is_prefix) + }; + + let mut stream = fst.search(automaton).into_stream(); + let mut results = vec![]; + while let Some(facet_value) = stream.next() { + let value = std::str::from_utf8(facet_value)?; + if self + .fetch_original_facets_using_normalized( + fid, + value, + query, + &search_candidates, + &mut results, + )? + .is_break() + { + break; + } + } + + Ok(results) + } + } else { + let automaton = Str::new(query).starts_with(); + let mut stream = fst.search(automaton).into_stream(); + let mut results = vec![]; + while let Some(facet_value) = stream.next() { + let value = std::str::from_utf8(facet_value)?; + if self + .fetch_original_facets_using_normalized( + fid, + value, + query, + &search_candidates, + &mut results, + )? + .is_break() + { + break; + } + } + + Ok(results) + } + } + None => { + let mut results = vec![]; + let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" }; + for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { + let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = + result?; + let count = search_candidates.intersection_len(&bitmap); + if count != 0 { + let value = self + .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? + .unwrap_or_else(|| left_bound.to_string()); + results.push(FacetValueHit { value, count }); + } + if results.len() >= self.max_values { + break; + } + } + Ok(results) + } + } + } + + fn fetch_original_facets_using_normalized( + &self, + fid: FieldId, + value: &str, + query: &str, + search_candidates: &RoaringBitmap, + results: &mut Vec, + ) -> Result> { + let index = self.search_query.index; + let rtxn = self.search_query.rtxn; + + let database = index.facet_id_normalized_string_strings; + let key = (fid, value); + let original_strings = match database.get(rtxn, &key)? { + Some(original_strings) => original_strings, + None => { + error!("the facet value is missing from the facet database: {key:?}"); + return Ok(ControlFlow::Continue(())); + } + }; + for original in original_strings { + let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() }; + let docids = match index.facet_id_string_docids.get(rtxn, &key)? { + Some(FacetGroupValue { bitmap, .. }) => bitmap, + None => { + error!("the facet value is missing from the facet database: {key:?}"); + return Ok(ControlFlow::Continue(())); + } + }; + let count = search_candidates.intersection_len(&docids); + if count != 0 { + let value = self + .one_original_value_of(fid, &original, docids.min().unwrap())? + .unwrap_or_else(|| query.to_string()); + results.push(FacetValueHit { value, count }); + } + if results.len() >= self.max_values { + return Ok(ControlFlow::Break(())); + } + } + + Ok(ControlFlow::Continue(())) + } +} + +#[derive(Debug, Clone, serde::Serialize, PartialEq)] +pub struct FacetValueHit { + /// The original facet value + pub value: String, + /// The number of documents associated to this facet + pub count: u64, +} + #[cfg(test)] mod test { #[allow(unused_imports)] diff --git a/milli/src/search/new/tests/typo_proximity.rs b/milli/src/search/new/tests/typo_proximity.rs index e71d32331..8dd110704 100644 --- a/milli/src/search/new/tests/typo_proximity.rs +++ b/milli/src/search/new/tests/typo_proximity.rs @@ -5,7 +5,7 @@ The typo ranking rule should transform the query graph such that it only contain the combinations of word derivations that it used to compute its bucket. The proximity ranking rule should then look for proximities only between those specific derivations. -For example, given the search query `beautiful summer` and the dataset: +For example, given the the search query `beautiful summer` and the dataset: ```text { "id": 0, "text": "beautigul summer...... beautiful day in the summer" } { "id": 1, "text": "beautiful summer" } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 46014202b..2f53718ac 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -14,13 +14,12 @@ use super::IndexerConfig; use crate::criterion::Criterion; use crate::error::UserError; use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; -use crate::order_by_map::OrderByMap; use crate::proximity::ProximityPrecision; use crate::update::index_documents::IndexDocumentsMethod; use crate::update::{IndexDocuments, UpdateIndexingStep}; use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings}; use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs}; -use crate::{FieldsIdsMap, Index, Result}; +use crate::{FieldsIdsMap, Index, OrderBy, Result}; #[derive(Debug, Clone, PartialEq, Eq, Copy)] pub enum Setting { @@ -146,7 +145,7 @@ pub struct Settings<'a, 't, 'i> { /// Attributes on which typo tolerance is disabled. exact_attributes: Setting>, max_values_per_facet: Setting, - sort_facet_values_by: Setting, + sort_facet_values_by: Setting>, pagination_max_total_hits: Setting, proximity_precision: Setting, embedder_settings: Setting>>, @@ -341,7 +340,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.max_values_per_facet = Setting::Reset; } - pub fn set_sort_facet_values_by(&mut self, value: OrderByMap) { + pub fn set_sort_facet_values_by(&mut self, value: HashMap) { self.sort_facet_values_by = Setting::Set(value); } @@ -1187,13 +1186,6 @@ pub fn validate_embedding_settings( } } } - EmbedderSource::Ollama => { - // Dimensions get inferred, only model name is required - check_unset(&dimensions, "dimensions", inferred_source, name)?; - check_set(&model, "model", inferred_source, name)?; - check_unset(&api_key, "apiKey", inferred_source, name)?; - check_unset(&revision, "revision", inferred_source, name)?; - } EmbedderSource::HuggingFace => { check_unset(&api_key, "apiKey", inferred_source, name)?; check_unset(&dimensions, "dimensions", inferred_source, name)?; diff --git a/milli/src/vector/error.rs b/milli/src/vector/error.rs index 9bbdeaa90..fbe4ee878 100644 --- a/milli/src/vector/error.rs +++ b/milli/src/vector/error.rs @@ -2,7 +2,6 @@ use std::path::PathBuf; use hf_hub::api::sync::ApiError; -use super::ollama::OllamaError; use crate::error::FaultSource; use crate::vector::openai::OpenAiError; @@ -72,17 +71,6 @@ pub enum EmbedErrorKind { OpenAiRuntimeInit(std::io::Error), #[error("initializing web client for sending embedding requests failed: {0}")] InitWebClient(reqwest::Error), - // Dedicated Ollama error kinds, might have to merge them into one cohesive error type for all backends. - #[error("unexpected response from Ollama: {0}")] - OllamaUnexpected(reqwest::Error), - #[error("sent too many requests to Ollama: {0}")] - OllamaTooManyRequests(OllamaError), - #[error("received internal error from Ollama: {0}")] - OllamaInternalServerError(OllamaError), - #[error("model not found. Meilisearch will not automatically download models from the Ollama library, please pull the model manually: {0}")] - OllamaModelNotFoundError(OllamaError), - #[error("received unhandled HTTP status code {0} from Ollama")] - OllamaUnhandledStatusCode(u16), } impl EmbedError { @@ -141,26 +129,6 @@ impl EmbedError { pub fn openai_initialize_web_client(inner: reqwest::Error) -> Self { Self { kind: EmbedErrorKind::InitWebClient(inner), fault: FaultSource::Runtime } } - - pub(crate) fn ollama_unexpected(inner: reqwest::Error) -> EmbedError { - Self { kind: EmbedErrorKind::OllamaUnexpected(inner), fault: FaultSource::Bug } - } - - pub(crate) fn ollama_model_not_found(inner: OllamaError) -> EmbedError { - Self { kind: EmbedErrorKind::OllamaModelNotFoundError(inner), fault: FaultSource::User } - } - - pub(crate) fn ollama_too_many_requests(inner: OllamaError) -> EmbedError { - Self { kind: EmbedErrorKind::OllamaTooManyRequests(inner), fault: FaultSource::Runtime } - } - - pub(crate) fn ollama_internal_server_error(inner: OllamaError) -> EmbedError { - Self { kind: EmbedErrorKind::OllamaInternalServerError(inner), fault: FaultSource::Runtime } - } - - pub(crate) fn ollama_unhandled_status_code(code: u16) -> EmbedError { - Self { kind: EmbedErrorKind::OllamaUnhandledStatusCode(code), fault: FaultSource::Bug } - } } #[derive(Debug, thiserror::Error)] @@ -227,13 +195,6 @@ impl NewEmbedderError { } } - pub fn ollama_could_not_determine_dimension(inner: EmbedError) -> NewEmbedderError { - Self { - kind: NewEmbedderErrorKind::CouldNotDetermineDimension(inner), - fault: FaultSource::User, - } - } - pub fn openai_invalid_api_key_format(inner: reqwest::header::InvalidHeaderValue) -> Self { Self { kind: NewEmbedderErrorKind::InvalidApiKeyFormat(inner), fault: FaultSource::User } } diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs index 035ac555e..6aa324da9 100644 --- a/milli/src/vector/mod.rs +++ b/milli/src/vector/mod.rs @@ -10,8 +10,6 @@ pub mod manual; pub mod openai; pub mod settings; -pub mod ollama; - pub use self::error::Error; pub type Embedding = Vec; @@ -78,7 +76,6 @@ pub enum Embedder { HuggingFace(hf::Embedder), OpenAi(openai::Embedder), UserProvided(manual::Embedder), - Ollama(ollama::Embedder), } #[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)] @@ -130,7 +127,6 @@ impl IntoIterator for EmbeddingConfigs { pub enum EmbedderOptions { HuggingFace(hf::EmbedderOptions), OpenAi(openai::EmbedderOptions), - Ollama(ollama::EmbedderOptions), UserProvided(manual::EmbedderOptions), } @@ -148,10 +144,6 @@ impl EmbedderOptions { pub fn openai(api_key: Option) -> Self { Self::OpenAi(openai::EmbedderOptions::with_default_model(api_key)) } - - pub fn ollama() -> Self { - Self::Ollama(ollama::EmbedderOptions::with_default_model()) - } } impl Embedder { @@ -159,7 +151,6 @@ impl Embedder { Ok(match options { EmbedderOptions::HuggingFace(options) => Self::HuggingFace(hf::Embedder::new(options)?), EmbedderOptions::OpenAi(options) => Self::OpenAi(openai::Embedder::new(options)?), - EmbedderOptions::Ollama(options) => Self::Ollama(ollama::Embedder::new(options)?), EmbedderOptions::UserProvided(options) => { Self::UserProvided(manual::Embedder::new(options)) } @@ -176,10 +167,6 @@ impl Embedder { let client = embedder.new_client()?; embedder.embed(texts, &client).await } - Embedder::Ollama(embedder) => { - let client = embedder.new_client()?; - embedder.embed(texts, &client).await - } Embedder::UserProvided(embedder) => embedder.embed(texts), } } @@ -194,7 +181,6 @@ impl Embedder { match self { Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks), Embedder::OpenAi(embedder) => embedder.embed_chunks(text_chunks), - Embedder::Ollama(embedder) => embedder.embed_chunks(text_chunks), Embedder::UserProvided(embedder) => embedder.embed_chunks(text_chunks), } } @@ -203,7 +189,6 @@ impl Embedder { match self { Embedder::HuggingFace(embedder) => embedder.chunk_count_hint(), Embedder::OpenAi(embedder) => embedder.chunk_count_hint(), - Embedder::Ollama(embedder) => embedder.chunk_count_hint(), Embedder::UserProvided(_) => 1, } } @@ -212,7 +197,6 @@ impl Embedder { match self { Embedder::HuggingFace(embedder) => embedder.prompt_count_in_chunk_hint(), Embedder::OpenAi(embedder) => embedder.prompt_count_in_chunk_hint(), - Embedder::Ollama(embedder) => embedder.prompt_count_in_chunk_hint(), Embedder::UserProvided(_) => 1, } } @@ -221,7 +205,6 @@ impl Embedder { match self { Embedder::HuggingFace(embedder) => embedder.dimensions(), Embedder::OpenAi(embedder) => embedder.dimensions(), - Embedder::Ollama(embedder) => embedder.dimensions(), Embedder::UserProvided(embedder) => embedder.dimensions(), } } @@ -230,7 +213,6 @@ impl Embedder { match self { Embedder::HuggingFace(embedder) => embedder.distribution(), Embedder::OpenAi(embedder) => embedder.distribution(), - Embedder::Ollama(embedder) => embedder.distribution(), Embedder::UserProvided(_embedder) => None, } } diff --git a/milli/src/vector/ollama.rs b/milli/src/vector/ollama.rs deleted file mode 100644 index 76988f70b..000000000 --- a/milli/src/vector/ollama.rs +++ /dev/null @@ -1,307 +0,0 @@ -// Copied from "openai.rs" with the sections I actually understand changed for Ollama. -// The common components of the Ollama and OpenAI interfaces might need to be extracted. - -use std::fmt::Display; - -use reqwest::StatusCode; - -use super::error::{EmbedError, NewEmbedderError}; -use super::openai::Retry; -use super::{DistributionShift, Embedding, Embeddings}; - -#[derive(Debug)] -pub struct Embedder { - headers: reqwest::header::HeaderMap, - options: EmbedderOptions, -} - -#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] -pub struct EmbedderOptions { - pub embedding_model: EmbeddingModel, -} - -#[derive( - Debug, Clone, Hash, PartialEq, Eq, serde::Serialize, serde::Deserialize, deserr::Deserr, -)] -#[deserr(deny_unknown_fields)] -pub struct EmbeddingModel { - name: String, - dimensions: usize, -} - -#[derive(Debug, serde::Serialize)] -struct OllamaRequest<'a> { - model: &'a str, - prompt: &'a str, -} - -#[derive(Debug, serde::Deserialize)] -struct OllamaResponse { - embedding: Embedding, -} - -#[derive(Debug, serde::Deserialize)] -pub struct OllamaError { - error: String, -} - -impl EmbeddingModel { - pub fn max_token(&self) -> usize { - // this might not be the same for all models - 8192 - } - - pub fn default_dimensions(&self) -> usize { - // Dimensions for nomic-embed-text - 768 - } - - pub fn name(&self) -> String { - self.name.clone() - } - - pub fn from_name(name: &str) -> Self { - Self { name: name.to_string(), dimensions: 0 } - } - - pub fn supports_overriding_dimensions(&self) -> bool { - false - } -} - -impl Default for EmbeddingModel { - fn default() -> Self { - Self { name: "nomic-embed-text".to_string(), dimensions: 0 } - } -} - -impl EmbedderOptions { - pub fn with_default_model() -> Self { - Self { embedding_model: Default::default() } - } - - pub fn with_embedding_model(embedding_model: EmbeddingModel) -> Self { - Self { embedding_model } - } -} - -impl Embedder { - pub fn new_client(&self) -> Result { - reqwest::ClientBuilder::new() - .default_headers(self.headers.clone()) - .build() - .map_err(EmbedError::openai_initialize_web_client) - } - - pub fn new(options: EmbedderOptions) -> Result { - let mut headers = reqwest::header::HeaderMap::new(); - headers.insert( - reqwest::header::CONTENT_TYPE, - reqwest::header::HeaderValue::from_static("application/json"), - ); - - let mut embedder = Self { options, headers }; - - let rt = tokio::runtime::Builder::new_current_thread() - .enable_io() - .enable_time() - .build() - .map_err(EmbedError::openai_runtime_init) - .map_err(NewEmbedderError::ollama_could_not_determine_dimension)?; - - // Get dimensions from Ollama - let request = - OllamaRequest { model: &embedder.options.embedding_model.name(), prompt: "test" }; - // TODO: Refactor into shared error type - let client = embedder - .new_client() - .map_err(NewEmbedderError::ollama_could_not_determine_dimension)?; - - rt.block_on(async move { - let response = client - .post(get_ollama_path()) - .json(&request) - .send() - .await - .map_err(EmbedError::ollama_unexpected) - .map_err(NewEmbedderError::ollama_could_not_determine_dimension)?; - - // Process error in case model not found - let response = Self::check_response(response).await.map_err(|_err| { - let e = EmbedError::ollama_model_not_found(OllamaError { - error: format!("model: {}", embedder.options.embedding_model.name()), - }); - NewEmbedderError::ollama_could_not_determine_dimension(e) - })?; - - let response: OllamaResponse = response - .json() - .await - .map_err(EmbedError::ollama_unexpected) - .map_err(NewEmbedderError::ollama_could_not_determine_dimension)?; - - let embedding = Embeddings::from_single_embedding(response.embedding); - - embedder.options.embedding_model.dimensions = embedding.dimension(); - - tracing::info!( - "ollama model {} with dimensionality {} added", - embedder.options.embedding_model.name(), - embedding.dimension() - ); - - Ok(embedder) - }) - } - - async fn check_response(response: reqwest::Response) -> Result { - if !response.status().is_success() { - // Not the same number of possible error cases covered as with OpenAI. - match response.status() { - StatusCode::TOO_MANY_REQUESTS => { - let error_response: OllamaError = response - .json() - .await - .map_err(EmbedError::ollama_unexpected) - .map_err(Retry::retry_later)?; - - return Err(Retry::rate_limited(EmbedError::ollama_too_many_requests( - OllamaError { error: error_response.error }, - ))); - } - StatusCode::SERVICE_UNAVAILABLE => { - let error_response: OllamaError = response - .json() - .await - .map_err(EmbedError::ollama_unexpected) - .map_err(Retry::retry_later)?; - return Err(Retry::retry_later(EmbedError::ollama_internal_server_error( - OllamaError { error: error_response.error }, - ))); - } - StatusCode::NOT_FOUND => { - let error_response: OllamaError = response - .json() - .await - .map_err(EmbedError::ollama_unexpected) - .map_err(Retry::give_up)?; - - return Err(Retry::give_up(EmbedError::ollama_model_not_found(OllamaError { - error: error_response.error, - }))); - } - code => { - return Err(Retry::give_up(EmbedError::ollama_unhandled_status_code( - code.as_u16(), - ))); - } - } - } - Ok(response) - } - - pub async fn embed( - &self, - texts: Vec, - client: &reqwest::Client, - ) -> Result>, EmbedError> { - // Ollama only embedds one document at a time. - let mut results = Vec::with_capacity(texts.len()); - - // The retry loop is inside the texts loop, might have to switch that around - for text in texts { - // Retries copied from openai.rs - for attempt in 0..7 { - let retry_duration = match self.try_embed(&text, client).await { - Ok(result) => { - results.push(result); - break; - } - Err(retry) => { - tracing::warn!("Failed: {}", retry.error); - retry.into_duration(attempt) - } - }?; - tracing::warn!( - "Attempt #{}, retrying after {}ms.", - attempt, - retry_duration.as_millis() - ); - tokio::time::sleep(retry_duration).await; - } - } - - Ok(results) - } - - async fn try_embed( - &self, - text: &str, - client: &reqwest::Client, - ) -> Result, Retry> { - let request = OllamaRequest { model: &self.options.embedding_model.name(), prompt: text }; - let response = client - .post(get_ollama_path()) - .json(&request) - .send() - .await - .map_err(EmbedError::openai_network) - .map_err(Retry::retry_later)?; - - let response = Self::check_response(response).await?; - - let response: OllamaResponse = response - .json() - .await - .map_err(EmbedError::openai_unexpected) - .map_err(Retry::retry_later)?; - - tracing::trace!("response: {:?}", response.embedding); - - let embedding = Embeddings::from_single_embedding(response.embedding); - Ok(embedding) - } - - pub fn embed_chunks( - &self, - text_chunks: Vec>, - ) -> Result>>, EmbedError> { - let rt = tokio::runtime::Builder::new_current_thread() - .enable_io() - .enable_time() - .build() - .map_err(EmbedError::openai_runtime_init)?; - let client = self.new_client()?; - rt.block_on(futures::future::try_join_all( - text_chunks.into_iter().map(|prompts| self.embed(prompts, &client)), - )) - } - - // Defaults copied from openai.rs - pub fn chunk_count_hint(&self) -> usize { - 10 - } - - pub fn prompt_count_in_chunk_hint(&self) -> usize { - 10 - } - - pub fn dimensions(&self) -> usize { - self.options.embedding_model.dimensions - } - - pub fn distribution(&self) -> Option { - None - } -} - -impl Display for OllamaError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.error) - } -} - -fn get_ollama_path() -> String { - // Important: Hostname not enough, has to be entire path to embeddings endpoint - std::env::var("MEILI_OLLAMA_URL").unwrap_or("http://localhost:11434/api/embeddings".to_string()) -} diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index dcf3f4c89..33442dda4 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -419,12 +419,12 @@ impl Embedder { // retrying in case of failure -pub struct Retry { - pub error: EmbedError, +struct Retry { + error: EmbedError, strategy: RetryStrategy, } -pub enum RetryStrategy { +enum RetryStrategy { GiveUp, Retry, RetryTokenized, @@ -432,23 +432,23 @@ pub enum RetryStrategy { } impl Retry { - pub fn give_up(error: EmbedError) -> Self { + fn give_up(error: EmbedError) -> Self { Self { error, strategy: RetryStrategy::GiveUp } } - pub fn retry_later(error: EmbedError) -> Self { + fn retry_later(error: EmbedError) -> Self { Self { error, strategy: RetryStrategy::Retry } } - pub fn retry_tokenized(error: EmbedError) -> Self { + fn retry_tokenized(error: EmbedError) -> Self { Self { error, strategy: RetryStrategy::RetryTokenized } } - pub fn rate_limited(error: EmbedError) -> Self { + fn rate_limited(error: EmbedError) -> Self { Self { error, strategy: RetryStrategy::RetryAfterRateLimit } } - pub fn into_duration(self, attempt: u32) -> Result { + fn into_duration(self, attempt: u32) -> Result { match self.strategy { RetryStrategy::GiveUp => Err(self.error), RetryStrategy::Retry => Ok(tokio::time::Duration::from_millis((10u64).pow(attempt))), @@ -459,11 +459,11 @@ impl Retry { } } - pub fn must_tokenize(&self) -> bool { + fn must_tokenize(&self) -> bool { matches!(self.strategy, RetryStrategy::RetryTokenized) } - pub fn into_error(self) -> EmbedError { + fn into_error(self) -> EmbedError { self.error } } diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index 89571e98a..834a1c81d 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -1,7 +1,7 @@ use deserr::Deserr; use serde::{Deserialize, Serialize}; -use super::{ollama, openai}; +use super::openai; use crate::prompt::PromptData; use crate::update::Setting; use crate::vector::EmbeddingConfig; @@ -80,15 +80,11 @@ impl EmbeddingSettings { Self::SOURCE => { &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::UserProvided] } - Self::MODEL => { - &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama] - } + Self::MODEL => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi], Self::REVISION => &[EmbedderSource::HuggingFace], Self::API_KEY => &[EmbedderSource::OpenAi], Self::DIMENSIONS => &[EmbedderSource::OpenAi, EmbedderSource::UserProvided], - Self::DOCUMENT_TEMPLATE => { - &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama] - } + Self::DOCUMENT_TEMPLATE => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi], _other => unreachable!("unknown field"), } } @@ -105,7 +101,6 @@ impl EmbeddingSettings { EmbedderSource::HuggingFace => { &[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE] } - EmbedderSource::Ollama => &[Self::SOURCE, Self::MODEL, Self::DOCUMENT_TEMPLATE], EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS], } } @@ -139,7 +134,6 @@ pub enum EmbedderSource { #[default] OpenAi, HuggingFace, - Ollama, UserProvided, } @@ -149,7 +143,6 @@ impl std::fmt::Display for EmbedderSource { EmbedderSource::OpenAi => "openAi", EmbedderSource::HuggingFace => "huggingFace", EmbedderSource::UserProvided => "userProvided", - EmbedderSource::Ollama => "ollama", }; f.write_str(s) } @@ -202,14 +195,6 @@ impl From for EmbeddingSettings { dimensions: options.dimensions.map(Setting::Set).unwrap_or_default(), document_template: Setting::Set(prompt.template), }, - super::EmbedderOptions::Ollama(options) => Self { - source: Setting::Set(EmbedderSource::Ollama), - model: Setting::Set(options.embedding_model.name().to_owned()), - revision: Setting::NotSet, - api_key: Setting::NotSet, - dimensions: Setting::NotSet, - document_template: Setting::Set(prompt.template), - }, super::EmbedderOptions::UserProvided(options) => Self { source: Setting::Set(EmbedderSource::UserProvided), model: Setting::NotSet, @@ -244,14 +229,6 @@ impl From for EmbeddingConfig { } this.embedder_options = super::EmbedderOptions::OpenAi(options); } - EmbedderSource::Ollama => { - let mut options: ollama::EmbedderOptions = - super::ollama::EmbedderOptions::with_default_model(); - if let Some(model) = model.set() { - options.embedding_model = super::ollama::EmbeddingModel::from_name(&model); - } - this.embedder_options = super::EmbedderOptions::Ollama(options); - } EmbedderSource::HuggingFace => { let mut options = super::hf::EmbedderOptions::default(); if let Some(model) = model.set() { diff --git a/workloads/settings-add-remove-filters.json b/workloads/settings-add-remove-filters.json deleted file mode 100644 index 04a57c707..000000000 --- a/workloads/settings-add-remove-filters.json +++ /dev/null @@ -1,94 +0,0 @@ -{ - "name": "settings-add-remove-filters.json", - "run_count": 2, - "extra_cli_args": [ - "--max-indexing-threads=4" - ], - "assets": { - "150k-people.json": { - "local_location": null, - "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/150k-people.json", - "sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b" - } - }, - "commands": [ - { - "route": "indexes/peoples/settings", - "method": "PATCH", - "body": { - "inline": { - "searchableAttributes": [ - "last_name", - "first_name", - "featured_job_organization_name", - "facebook_url", - "twitter_url", - "linkedin_url" - ], - "filterableAttributes": [ - "city", - "region", - "country_code" - ], - "dictionary": [ - "https://", - "http://", - "www.", - "crunchbase.com", - "facebook.com", - "twitter.com", - "linkedin.com" - ], - "stopWords": [ - "https://", - "http://", - "www.", - "crunchbase.com", - "facebook.com", - "twitter.com", - "linkedin.com" - ] - } - }, - "synchronous": "DontWait" - }, - { - "route": "indexes/peoples/documents", - "method": "POST", - "body": { - "asset": "150k-people.json" - }, - "synchronous": "WaitForTask" - }, - { - "route": "indexes/peoples/settings", - "method": "PATCH", - "body": { - "inline": { - "filterableAttributes": [ - "city", - "region", - "country_code", - "featured_job_title", - "featured_job_organization_name" - ] - } - }, - "synchronous": "WaitForTask" - }, - { - "route": "indexes/peoples/settings", - "method": "PATCH", - "body": { - "inline": { - "filterableAttributes": [ - "city", - "region", - "country_code" - ] - } - }, - "synchronous": "WaitForTask" - } - ] -} \ No newline at end of file diff --git a/workloads/settings-proximity-precision.json b/workloads/settings-proximity-precision.json deleted file mode 100644 index 48cfad49d..000000000 --- a/workloads/settings-proximity-precision.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "name": "settings-proximity-precision.json", - "run_count": 2, - "extra_cli_args": [ - "--max-indexing-threads=4" - ], - "assets": { - "150k-people.json": { - "local_location": null, - "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/150k-people.json", - "sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b" - } - }, - "commands": [ - { - "route": "indexes/peoples/settings", - "method": "PATCH", - "body": { - "inline": { - "searchableAttributes": [ - "last_name", - "first_name", - "featured_job_organization_name", - "facebook_url", - "twitter_url", - "linkedin_url" - ], - "filterableAttributes": [ - "city", - "region", - "country_code", - "featured_job_title", - "featured_job_organization_name" - ], - "dictionary": [ - "https://", - "http://", - "www.", - "crunchbase.com", - "facebook.com", - "twitter.com", - "linkedin.com" - ], - "stopWords": [ - "https://", - "http://", - "www.", - "crunchbase.com", - "facebook.com", - "twitter.com", - "linkedin.com" - ] - } - }, - "synchronous": "DontWait" - }, - { - "route": "indexes/peoples/documents", - "method": "POST", - "body": { - "asset": "150k-people.json" - }, - "synchronous": "WaitForTask" - }, - { - "route": "indexes/peoples/settings", - "method": "PATCH", - "body": { - "inline": { - "proximityPrecision": "byAttribute" - } - }, - "synchronous": "WaitForTask" - }, - { - "route": "indexes/peoples/settings", - "method": "PATCH", - "body": { - "inline": { - "proximityPrecision": "byWord" - } - }, - "synchronous": "WaitForTask" - } - ] -} \ No newline at end of file diff --git a/workloads/settings-remove-add-swap-searchable.json b/workloads/settings-remove-add-swap-searchable.json deleted file mode 100644 index ba315680f..000000000 --- a/workloads/settings-remove-add-swap-searchable.json +++ /dev/null @@ -1,114 +0,0 @@ -{ - "name": "settings-remove-add-swap-searchable.json", - "run_count": 2, - "extra_cli_args": [ - "--max-indexing-threads=4" - ], - "assets": { - "150k-people.json": { - "local_location": null, - "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/150k-people.json", - "sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b" - } - }, - "commands": [ - { - "route": "indexes/peoples/settings", - "method": "PATCH", - "body": { - "inline": { - "searchableAttributes": [ - "last_name", - "first_name", - "featured_job_organization_name", - "facebook_url", - "twitter_url", - "linkedin_url" - ], - "filterableAttributes": [ - "city", - "region", - "country_code", - "featured_job_title", - "featured_job_organization_name" - ], - "dictionary": [ - "https://", - "http://", - "www.", - "crunchbase.com", - "facebook.com", - "twitter.com", - "linkedin.com" - ], - "stopWords": [ - "https://", - "http://", - "www.", - "crunchbase.com", - "facebook.com", - "twitter.com", - "linkedin.com" - ] - } - }, - "synchronous": "DontWait" - }, - { - "route": "indexes/peoples/documents", - "method": "POST", - "body": { - "asset": "150k-people.json" - }, - "synchronous": "WaitForTask" - }, - { - "route": "indexes/peoples/settings", - "method": "PATCH", - "body": { - "inline": { - "searchableAttributes": [ - "last_name", - "first_name", - "featured_job_organization_name" - ] - } - }, - "synchronous": "WaitForTask" - }, - { - "route": "indexes/peoples/settings", - "method": "PATCH", - "body": { - "inline": { - "searchableAttributes": [ - "last_name", - "first_name", - "featured_job_organization_name", - "facebook_url", - "twitter_url", - "linkedin_url" - ] - } - }, - "synchronous": "WaitForTask" - }, - { - "route": "indexes/peoples/settings", - "method": "PATCH", - "body": { - "inline": { - "searchableAttributes": [ - "first_name", - "last_name", - "featured_job_organization_name", - "facebook_url", - "twitter_url", - "linkedin_url" - ] - } - }, - "synchronous": "WaitForTask" - } - ] -} \ No newline at end of file diff --git a/workloads/settings-typo.json b/workloads/settings-typo.json deleted file mode 100644 index a272e6d1f..000000000 --- a/workloads/settings-typo.json +++ /dev/null @@ -1,115 +0,0 @@ -{ - "name": "settings-typo.json", - "run_count": 2, - "extra_cli_args": [ - "--max-indexing-threads=4" - ], - "assets": { - "150k-people.json": { - "local_location": null, - "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/150k-people.json", - "sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b" - } - }, - "commands": [ - { - "route": "indexes/peoples/settings", - "method": "PATCH", - "body": { - "inline": { - "searchableAttributes": [ - "last_name", - "first_name", - "featured_job_title", - "featured_job_organization_name", - "facebook_url", - "twitter_url", - "linkedin_url" - ], - "filterableAttributes": [ - "city", - "region", - "country_code", - "featured_job_title", - "featured_job_organization_name" - ], - "dictionary": [ - "https://", - "http://", - "www.", - "crunchbase.com", - "facebook.com", - "twitter.com", - "linkedin.com" - ], - "stopWords": [ - "https://", - "http://", - "www.", - "crunchbase.com", - "facebook.com", - "twitter.com", - "linkedin.com" - ] - } - }, - "synchronous": "DontWait" - }, - { - "route": "indexes/peoples/documents", - "method": "POST", - "body": { - "asset": "150k-people.json" - }, - "synchronous": "WaitForTask" - }, - { - "route": "indexes/peoples/settings", - "method": "PATCH", - "body": { - "inline": { - "typoTolerance": { - "disableOnAttributes": ["featured_job_organization_name"] - } - } - }, - "synchronous": "WaitForTask" - }, - { - "route": "indexes/peoples/settings", - "method": "PATCH", - "body": { - "inline": { - "typoTolerance": { - "disableOnAttributes": [] - } - } - }, - "synchronous": "WaitForTask" - }, - { - "route": "indexes/peoples/settings", - "method": "PATCH", - "body": { - "inline": { - "typoTolerance": { - "disableOnWords": ["Ben","Elowitz","Kevin","Flaherty", "Ron", "Dustin", "Owen", "Chris", "Mark", "Matt", "Peter", "Van", "Head", "of"] - } - } - }, - "synchronous": "WaitForTask" - }, - { - "route": "indexes/peoples/settings", - "method": "PATCH", - "body": { - "inline": { - "typoTolerance": { - "disableOnWords": [] - } - } - }, - "synchronous": "WaitForTask" - } - ] -} \ No newline at end of file diff --git a/xtask/src/bench/dashboard.rs b/xtask/src/bench/dashboard.rs index 3ba0ca58b..833426207 100644 --- a/xtask/src/bench/dashboard.rs +++ b/xtask/src/bench/dashboard.rs @@ -11,179 +11,157 @@ use super::client::Client; use super::env_info; use super::workload::Workload; -#[derive(Debug, Clone)] -pub enum DashboardClient { - Client(Client), - Dry, +pub async fn cancel_on_ctrl_c( + invocation_uuid: Uuid, + dashboard_client: Client, + abort_handle: AbortHandle, +) { + tracing::info!("press Ctrl-C to cancel the invocation"); + match ctrl_c().await { + Ok(()) => { + tracing::info!(%invocation_uuid, "received Ctrl-C, cancelling invocation"); + mark_as_failed(dashboard_client, invocation_uuid, None).await; + abort_handle.abort(); + } + Err(error) => tracing::warn!( + error = &error as &dyn std::error::Error, + "failed to listen to Ctrl-C signal, invocation won't be canceled on Ctrl-C" + ), + } } -impl DashboardClient { - pub fn new(dashboard_url: &str, api_key: Option<&str>) -> anyhow::Result { - let dashboard_client = Client::new( - Some(format!("{}/api/v1", dashboard_url)), - api_key, - Some(std::time::Duration::from_secs(60)), - )?; - - Ok(Self::Client(dashboard_client)) - } - - pub fn new_dry() -> Self { - Self::Dry - } - - pub async fn send_machine_info(&self, env: &env_info::Environment) -> anyhow::Result<()> { - let Self::Client(dashboard_client) = self else { return Ok(()) }; - - let response = dashboard_client - .put("machine") - .json(&json!({"hostname": env.hostname})) - .send() - .await - .context("sending machine information")?; - if !response.status().is_success() { - bail!( - "could not send machine information: {} {}", - response.status(), - response.text().await.unwrap_or_else(|_| "unknown".into()) - ); +pub async fn mark_as_failed( + dashboard_client: Client, + invocation_uuid: Uuid, + failure_reason: Option, +) { + let response = dashboard_client + .post("cancel-invocation") + .json(&json!({ + "invocation_uuid": invocation_uuid, + "failure_reason": failure_reason, + })) + .send() + .await; + let response = match response { + Ok(response) => response, + Err(response_error) => { + tracing::error!(error = &response_error as &dyn std::error::Error, %invocation_uuid, "could not mark invocation as failed"); + return; } - Ok(()) - } - - pub async fn create_invocation( - &self, - build_info: build_info::BuildInfo, - commit_message: &str, - env: env_info::Environment, - max_workloads: usize, - reason: Option<&str>, - ) -> anyhow::Result { - let Self::Client(dashboard_client) = self else { return Ok(Uuid::now_v7()) }; - - let response = dashboard_client - .put("invocation") - .json(&json!({ - "commit": { - "sha1": build_info.commit_sha1, - "message": commit_message, - "commit_date": build_info.commit_timestamp, - "branch": build_info.branch, - "tag": build_info.describe.and_then(|describe| describe.as_tag()), - }, - "machine_hostname": env.hostname, - "max_workloads": max_workloads, - "reason": reason - })) - .send() - .await - .context("sending invocation")?; - if !response.status().is_success() { - bail!( - "could not send new invocation: {}", - response.text().await.unwrap_or_else(|_| "unknown".into()) - ); - } - let invocation_uuid: Uuid = - response.json().await.context("could not deserialize invocation response as JSON")?; - Ok(invocation_uuid) - } - - pub async fn create_workload( - &self, - invocation_uuid: Uuid, - workload: &Workload, - ) -> anyhow::Result { - let Self::Client(dashboard_client) = self else { return Ok(Uuid::now_v7()) }; - - let response = dashboard_client - .put("workload") - .json(&json!({ - "invocation_uuid": invocation_uuid, - "name": &workload.name, - "max_runs": workload.run_count, - })) - .send() - .await - .context("could not create new workload")?; - - if !response.status().is_success() { - bail!("creating new workload failed: {}", response.text().await.unwrap()) - } - - let workload_uuid: Uuid = - response.json().await.context("could not deserialize JSON as UUID")?; - Ok(workload_uuid) - } - - pub async fn create_run( - &self, - workload_uuid: Uuid, - report: &BTreeMap, - ) -> anyhow::Result<()> { - let Self::Client(dashboard_client) = self else { return Ok(()) }; - - let response = dashboard_client - .put("run") - .json(&json!({ - "workload_uuid": workload_uuid, - "data": report - })) - .send() - .await - .context("sending new run")?; - if !response.status().is_success() { - bail!( - "sending new run failed: {}", - response.text().await.unwrap_or_else(|_| "unknown".into()) - ) - } - Ok(()) - } - - pub async fn cancel_on_ctrl_c(self, invocation_uuid: Uuid, abort_handle: AbortHandle) { - tracing::info!("press Ctrl-C to cancel the invocation"); - match ctrl_c().await { - Ok(()) => { - tracing::info!(%invocation_uuid, "received Ctrl-C, cancelling invocation"); - self.mark_as_failed(invocation_uuid, None).await; - abort_handle.abort(); - } - Err(error) => tracing::warn!( - error = &error as &dyn std::error::Error, - "failed to listen to Ctrl-C signal, invocation won't be canceled on Ctrl-C" - ), - } - } - - pub async fn mark_as_failed(&self, invocation_uuid: Uuid, failure_reason: Option) { - if let DashboardClient::Client(client) = self { - let response = client - .post("cancel-invocation") - .json(&json!({ - "invocation_uuid": invocation_uuid, - "failure_reason": failure_reason, - })) - .send() - .await; - let response = match response { - Ok(response) => response, - Err(response_error) => { - tracing::error!(error = &response_error as &dyn std::error::Error, %invocation_uuid, "could not mark invocation as failed"); - return; - } - }; - - if !response.status().is_success() { - tracing::error!( - %invocation_uuid, - "could not mark invocation as failed: {}", - response.text().await.unwrap() - ); - return; - } - } - - tracing::warn!(%invocation_uuid, "marked invocation as failed or canceled"); + }; + + if !response.status().is_success() { + tracing::error!( + %invocation_uuid, + "could not mark invocation as failed: {}", + response.text().await.unwrap() + ); + return; } + tracing::warn!(%invocation_uuid, "marked invocation as failed or canceled"); +} + +pub async fn send_machine_info( + dashboard_client: &Client, + env: &env_info::Environment, +) -> anyhow::Result<()> { + let response = dashboard_client + .put("machine") + .json(&json!({"hostname": env.hostname})) + .send() + .await + .context("sending machine information")?; + if !response.status().is_success() { + bail!( + "could not send machine information: {} {}", + response.status(), + response.text().await.unwrap_or_else(|_| "unknown".into()) + ); + } + Ok(()) +} + +pub async fn create_invocation( + dashboard_client: &Client, + build_info: build_info::BuildInfo, + commit_message: &str, + env: env_info::Environment, + max_workloads: usize, + reason: Option<&str>, +) -> anyhow::Result { + let response = dashboard_client + .put("invocation") + .json(&json!({ + "commit": { + "sha1": build_info.commit_sha1, + "message": commit_message, + "commit_date": build_info.commit_timestamp, + "branch": build_info.branch, + "tag": build_info.describe.and_then(|describe| describe.as_tag()), + }, + "machine_hostname": env.hostname, + "max_workloads": max_workloads, + "reason": reason + })) + .send() + .await + .context("sending invocation")?; + if !response.status().is_success() { + bail!( + "could not send new invocation: {}", + response.text().await.unwrap_or_else(|_| "unknown".into()) + ); + } + let invocation_uuid: Uuid = + response.json().await.context("could not deserialize invocation response as JSON")?; + Ok(invocation_uuid) +} + +pub async fn create_workload( + dashboard_client: &Client, + invocation_uuid: Uuid, + workload: &Workload, +) -> anyhow::Result { + let response = dashboard_client + .put("workload") + .json(&json!({ + "invocation_uuid": invocation_uuid, + "name": &workload.name, + "max_runs": workload.run_count, + })) + .send() + .await + .context("could not create new workload")?; + + if !response.status().is_success() { + bail!("creating new workload failed: {}", response.text().await.unwrap()) + } + + let workload_uuid: Uuid = + response.json().await.context("could not deserialize JSON as UUID")?; + Ok(workload_uuid) +} + +pub async fn create_run( + dashboard_client: Client, + workload_uuid: Uuid, + report: &BTreeMap, +) -> anyhow::Result<()> { + let response = dashboard_client + .put("run") + .json(&json!({ + "workload_uuid": workload_uuid, + "data": report + })) + .send() + .await + .context("sending new run")?; + if !response.status().is_success() { + bail!( + "sending new run failed: {}", + response.text().await.unwrap_or_else(|_| "unknown".into()) + ) + } + Ok(()) } diff --git a/xtask/src/bench/mod.rs b/xtask/src/bench/mod.rs index 844b64f63..62c11b604 100644 --- a/xtask/src/bench/mod.rs +++ b/xtask/src/bench/mod.rs @@ -50,10 +50,6 @@ pub struct BenchDeriveArgs { #[arg(long, default_value_t = default_dashboard_url())] dashboard_url: String, - /// Don't actually send results to the dashboard - #[arg(long)] - no_dashboard: bool, - /// Directory to output reports. #[arg(long, default_value_t = default_report_folder())] report_folder: String, @@ -107,11 +103,11 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> { let assets_client = Client::new(None, args.assets_key.as_deref(), Some(std::time::Duration::from_secs(3600)))?; // 1h - let dashboard_client = if args.no_dashboard { - dashboard::DashboardClient::new_dry() - } else { - dashboard::DashboardClient::new(&args.dashboard_url, args.api_key.as_deref())? - }; + let dashboard_client = Client::new( + Some(format!("{}/api/v1", args.dashboard_url)), + args.api_key.as_deref(), + Some(std::time::Duration::from_secs(60)), + )?; // reporting uses its own client because keeping the stream open to wait for entries // blocks any other requests @@ -131,12 +127,12 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> { // enter runtime rt.block_on(async { - dashboard_client.send_machine_info(&env).await?; + dashboard::send_machine_info(&dashboard_client, &env).await?; let commit_message = build_info.commit_msg.context("missing commit message")?.split('\n').next().unwrap(); let max_workloads = args.workload_file.len(); let reason: Option<&str> = args.reason.as_deref(); - let invocation_uuid = dashboard_client.create_invocation( build_info, commit_message, env, max_workloads, reason).await?; + let invocation_uuid = dashboard::create_invocation(&dashboard_client, build_info, commit_message, env, max_workloads, reason).await?; tracing::info!(workload_count = args.workload_file.len(), "handling workload files"); @@ -171,7 +167,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> { let abort_handle = workload_runs.abort_handle(); tokio::spawn({ let dashboard_client = dashboard_client.clone(); - dashboard_client.cancel_on_ctrl_c(invocation_uuid, abort_handle) + dashboard::cancel_on_ctrl_c(invocation_uuid, dashboard_client, abort_handle) }); // wait for the end of the main task, handle result @@ -182,7 +178,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> { } Ok(Err(error)) => { tracing::error!(%invocation_uuid, error = %error, "invocation failed, attempting to report the failure to dashboard"); - dashboard_client.mark_as_failed(invocation_uuid, Some(error.to_string())).await; + dashboard::mark_as_failed(dashboard_client, invocation_uuid, Some(error.to_string())).await; tracing::warn!(%invocation_uuid, "invocation marked as failed following error"); Err(error) }, @@ -190,7 +186,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> { match join_error.try_into_panic() { Ok(panic) => { tracing::error!("invocation panicked, attempting to report the failure to dashboard"); - dashboard_client.mark_as_failed( invocation_uuid, Some("Panicked".into())).await; + dashboard::mark_as_failed(dashboard_client, invocation_uuid, Some("Panicked".into())).await; std::panic::resume_unwind(panic) } Err(_) => { diff --git a/xtask/src/bench/workload.rs b/xtask/src/bench/workload.rs index d82c5ad19..b3e952f29 100644 --- a/xtask/src/bench/workload.rs +++ b/xtask/src/bench/workload.rs @@ -12,9 +12,8 @@ use uuid::Uuid; use super::assets::Asset; use super::client::Client; use super::command::SyncMode; -use super::dashboard::DashboardClient; use super::BenchDeriveArgs; -use crate::bench::{assets, meili_process}; +use crate::bench::{assets, dashboard, meili_process}; #[derive(Deserialize)] pub struct Workload { @@ -26,7 +25,7 @@ pub struct Workload { } async fn run_commands( - dashboard_client: &DashboardClient, + dashboard_client: &Client, logs_client: &Client, meili_client: &Client, workload_uuid: Uuid, @@ -65,7 +64,7 @@ async fn run_commands( #[tracing::instrument(skip(assets_client, dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = workload.name))] pub async fn execute( assets_client: &Client, - dashboard_client: &DashboardClient, + dashboard_client: &Client, logs_client: &Client, meili_client: &Client, invocation_uuid: Uuid, @@ -75,7 +74,8 @@ pub async fn execute( ) -> anyhow::Result<()> { assets::fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?; - let workload_uuid = dashboard_client.create_workload(invocation_uuid, &workload).await?; + let workload_uuid = + dashboard::create_workload(dashboard_client, invocation_uuid, &workload).await?; let mut tasks = Vec::new(); @@ -113,7 +113,7 @@ pub async fn execute( #[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner #[tracing::instrument(skip(dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = %workload.name))] async fn execute_run( - dashboard_client: &DashboardClient, + dashboard_client: &Client, logs_client: &Client, meili_client: &Client, workload_uuid: Uuid, @@ -202,7 +202,7 @@ async fn start_report( } async fn stop_report( - dashboard_client: &DashboardClient, + dashboard_client: &Client, logs_client: &Client, workload_uuid: Uuid, filename: String, @@ -232,7 +232,7 @@ async fn stop_report( .context("could not convert trace to report")?; let context = || format!("writing report to {filename}"); - dashboard_client.create_run(workload_uuid, &report).await?; + dashboard::create_run(dashboard_client, workload_uuid, &report).await?; let mut output_file = std::io::BufWriter::new( std::fs::File::options()