mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-22 20:50:04 +01:00
Merge #4512
4512: Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" r=Kerollmops a=irevoire Reverts meilisearch/meilisearch#4510 This PR was supposed to be merged on `release-v1.7.1` not main 🤦 Co-authored-by: Tamo <irevoire@protonmail.ch>
This commit is contained in:
commit
5046ffdf54
2
.github/workflows/bench-pr.yml
vendored
2
.github/workflows/bench-pr.yml
vendored
@ -43,4 +43,4 @@ jobs:
|
||||
|
||||
- name: Run benchmarks on PR ${{ github.event.issue.id }}
|
||||
run: |
|
||||
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "[Comment](${{ github.event.comment.url }}) on [#${{github.event.issue.id}}](${{ github.event.issue.url }})" -- ${{ steps.command.outputs.command-arguments }}
|
||||
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "[Comment](${{ github.event.comment.html_url }}) on [#${{ github.event.issue.number }}](${{ github.event.issue.html_url }})" -- ${{ steps.command.outputs.command-arguments }}
|
19
.github/workflows/milestone-workflow.yml
vendored
19
.github/workflows/milestone-workflow.yml
vendored
@ -110,6 +110,25 @@ jobs:
|
||||
--milestone $MILESTONE_VERSION \
|
||||
--assignee curquiza
|
||||
|
||||
create-update-version-issue:
|
||||
needs: get-release-version
|
||||
# Create the changelog issue if the release is not only a patch release
|
||||
if: github.event.action == 'created'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
ISSUE_TEMPLATE: issue-template.md
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Download the issue template
|
||||
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-version-issue.md > $ISSUE_TEMPLATE
|
||||
- name: Create the issue
|
||||
run: |
|
||||
gh issue create \
|
||||
--title "Update version in Cargo.toml for $MILESTONE_VERSION" \
|
||||
--label 'maintenance' \
|
||||
--body-file $ISSUE_TEMPLATE \
|
||||
--milestone $MILESTONE_VERSION
|
||||
|
||||
# ----------------
|
||||
# MILESTONE CLOSED
|
||||
# ----------------
|
||||
|
@ -4,7 +4,7 @@ First, thank you for contributing to Meilisearch! The goal of this document is t
|
||||
|
||||
Remember that there are many ways to contribute other than writing code: writing [tutorials or blog posts](https://github.com/meilisearch/awesome-meilisearch), improving [the documentation](https://github.com/meilisearch/documentation), submitting [bug reports](https://github.com/meilisearch/meilisearch/issues/new?assignees=&labels=&template=bug_report.md&title=) and [feature requests](https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal)...
|
||||
|
||||
The code in this repository is only concerned with managing multiple indexes, handling the update store, and exposing an HTTP API. Search and indexation are the domain of our core engine, [`milli`](https://github.com/meilisearch/milli), while tokenization is handled by [our `charabia` library](https://github.com/meilisearch/charabia/).
|
||||
Meilisearch can manage multiple indexes, handle the update store, and expose an HTTP API. Search and indexation are the domain of our core engine, [`milli`](https://github.com/meilisearch/meilisearch/tree/main/milli), while tokenization is handled by [our `charabia` library](https://github.com/meilisearch/charabia/).
|
||||
|
||||
If Meilisearch does not offer optimized support for your language, please consider contributing to `charabia` by following the [CONTRIBUTING.md file](https://github.com/meilisearch/charabia/blob/main/CONTRIBUTING.md) and integrating your intended normalizer/segmenter.
|
||||
|
||||
|
195
Cargo.lock
generated
195
Cargo.lock
generated
@ -36,9 +36,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "actix-http"
|
||||
version = "3.5.1"
|
||||
version = "3.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "129d4c88e98860e1758c5de288d1632b07970a16d59bdf7b8d66053d582bb71f"
|
||||
checksum = "d223b13fd481fc0d1f83bb12659ae774d9e3601814c68a0bc539731698cca743"
|
||||
dependencies = [
|
||||
"actix-codec",
|
||||
"actix-rt",
|
||||
@ -138,9 +138,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "actix-tls"
|
||||
version = "3.1.1"
|
||||
version = "3.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72616e7fbec0aa99c6f3164677fa48ff5a60036d0799c98cab894a44f3e0efc3"
|
||||
checksum = "d4cce60a2f2b477bc72e5cde0af1812a6e82d8fd85b5570a5dcf2a5bf2c5be5f"
|
||||
dependencies = [
|
||||
"actix-rt",
|
||||
"actix-service",
|
||||
@ -148,13 +148,11 @@ dependencies = [
|
||||
"futures-core",
|
||||
"impl-more",
|
||||
"pin-project-lite",
|
||||
"rustls 0.21.6",
|
||||
"rustls-webpki",
|
||||
"tokio",
|
||||
"tokio-rustls 0.23.4",
|
||||
"tokio-rustls",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"webpki-roots 0.22.6",
|
||||
"webpki-roots",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -169,9 +167,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "actix-web"
|
||||
version = "4.4.1"
|
||||
version = "4.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e43428f3bf11dee6d166b00ec2df4e3aa8cc1606aaa0b7433c146852e2f4e03b"
|
||||
checksum = "43a6556ddebb638c2358714d853257ed226ece6023ef9364f23f0c70737ea984"
|
||||
dependencies = [
|
||||
"actix-codec",
|
||||
"actix-http",
|
||||
@ -259,9 +257,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.8.8"
|
||||
version = "0.8.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff"
|
||||
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"getrandom",
|
||||
@ -496,7 +494,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@ -630,7 +628,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "build-info"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"time",
|
||||
@ -835,9 +833,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.82"
|
||||
version = "1.0.83"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "305fe645edc1442a0fa8b6726ba61d422798d37a52e12eaecf4b022ebbb88f01"
|
||||
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
"libc",
|
||||
@ -1531,7 +1529,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@ -1769,7 +1767,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"faux",
|
||||
"tempfile",
|
||||
@ -1792,7 +1790,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
@ -1812,7 +1810,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@ -1930,7 +1928,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"clap",
|
||||
@ -2104,8 +2102,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"js-sys",
|
||||
"libc",
|
||||
"wasi",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2224,7 +2224,7 @@ dependencies = [
|
||||
"atomic-polyfill",
|
||||
"hash32",
|
||||
"rustc_version",
|
||||
"spin 0.9.8",
|
||||
"spin",
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
@ -2393,9 +2393,9 @@ dependencies = [
|
||||
"futures-util",
|
||||
"http 0.2.11",
|
||||
"hyper",
|
||||
"rustls 0.21.6",
|
||||
"rustls",
|
||||
"tokio",
|
||||
"tokio-rustls 0.24.1",
|
||||
"tokio-rustls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2422,7 +2422,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@ -2609,7 +2609,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@ -2617,13 +2617,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "jsonwebtoken"
|
||||
version = "8.3.0"
|
||||
version = "9.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378"
|
||||
checksum = "5c7ea04a7c5c055c175f189b6dc6ba036fd62306b58c66c9f6389036c503a3f4"
|
||||
dependencies = [
|
||||
"base64 0.21.7",
|
||||
"js-sys",
|
||||
"pem",
|
||||
"ring 0.16.20",
|
||||
"ring",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"simple_asn1",
|
||||
@ -3117,7 +3118,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@ -3126,7 +3127,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@ -3184,7 +3185,7 @@ dependencies = [
|
||||
"rayon",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"rustls 0.20.9",
|
||||
"rustls",
|
||||
"rustls-pemfile",
|
||||
"segment",
|
||||
"serde",
|
||||
@ -3219,7 +3220,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"base64 0.21.7",
|
||||
"enum-iterator",
|
||||
@ -3238,7 +3239,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@ -3268,7 +3269,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilitool"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
@ -3307,7 +3308,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"arroy",
|
||||
"big_s",
|
||||
@ -3413,9 +3414,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.8.9"
|
||||
version = "0.8.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0"
|
||||
checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
@ -3733,11 +3734,12 @@ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
|
||||
|
||||
[[package]]
|
||||
name = "pem"
|
||||
version = "1.1.1"
|
||||
version = "3.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8835c273a76a90455d7344889b0964598e3316e2a79ede8e36f16bdcf2228b8"
|
||||
checksum = "1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310"
|
||||
dependencies = [
|
||||
"base64 0.13.1",
|
||||
"base64 0.21.7",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -3748,7 +3750,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
@ -4239,14 +4241,14 @@ dependencies = [
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustls 0.21.6",
|
||||
"rustls",
|
||||
"rustls-pemfile",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"system-configuration",
|
||||
"tokio",
|
||||
"tokio-rustls 0.24.1",
|
||||
"tokio-rustls",
|
||||
"tokio-util",
|
||||
"tower-service",
|
||||
"url",
|
||||
@ -4254,7 +4256,7 @@ dependencies = [
|
||||
"wasm-bindgen-futures",
|
||||
"wasm-streams",
|
||||
"web-sys",
|
||||
"webpki-roots 0.25.3",
|
||||
"webpki-roots",
|
||||
"winreg",
|
||||
]
|
||||
|
||||
@ -4272,30 +4274,15 @@ checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.16.20"
|
||||
version = "0.17.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"spin 0.5.2",
|
||||
"untrusted 0.7.1",
|
||||
"web-sys",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9babe80d5c16becf6594aa32ad2be8fe08498e7ae60b77de8df700e67f191d7e"
|
||||
checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"getrandom",
|
||||
"libc",
|
||||
"spin 0.9.8",
|
||||
"untrusted 0.9.0",
|
||||
"spin",
|
||||
"untrusted",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
@ -4373,24 +4360,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.20.9"
|
||||
version = "0.21.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b80e3dec595989ea8510028f30c408a4630db12c9cbb8de34203b89d6577e99"
|
||||
checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba"
|
||||
dependencies = [
|
||||
"log",
|
||||
"ring 0.16.20",
|
||||
"sct",
|
||||
"webpki",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.21.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d1feddffcfcc0b33f5c6ce9a29e341e4cd59c3f78e7ee45f4a40c038b1d6cbb"
|
||||
dependencies = [
|
||||
"log",
|
||||
"ring 0.16.20",
|
||||
"ring",
|
||||
"rustls-webpki",
|
||||
"sct",
|
||||
]
|
||||
@ -4410,8 +4385,8 @@ version = "0.101.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
|
||||
dependencies = [
|
||||
"ring 0.17.3",
|
||||
"untrusted 0.9.0",
|
||||
"ring",
|
||||
"untrusted",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -4453,12 +4428,12 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "sct"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4"
|
||||
checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
|
||||
dependencies = [
|
||||
"ring 0.16.20",
|
||||
"untrusted 0.7.1",
|
||||
"ring",
|
||||
"untrusted",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -4721,12 +4696,6 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
version = "0.9.8"
|
||||
@ -5080,24 +5049,13 @@ dependencies = [
|
||||
"syn 2.0.48",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-rustls"
|
||||
version = "0.23.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59"
|
||||
dependencies = [
|
||||
"rustls 0.20.9",
|
||||
"tokio",
|
||||
"webpki",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-rustls"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
|
||||
dependencies = [
|
||||
"rustls 0.21.6",
|
||||
"rustls",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
@ -5366,12 +5324,6 @@ version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
|
||||
|
||||
[[package]]
|
||||
name = "untrusted"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
|
||||
|
||||
[[package]]
|
||||
name = "untrusted"
|
||||
version = "0.9.0"
|
||||
@ -5388,13 +5340,13 @@ dependencies = [
|
||||
"flate2",
|
||||
"log",
|
||||
"once_cell",
|
||||
"rustls 0.21.6",
|
||||
"rustls",
|
||||
"rustls-webpki",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"socks",
|
||||
"url",
|
||||
"webpki-roots 0.25.3",
|
||||
"webpki-roots",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -5630,25 +5582,6 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki"
|
||||
version = "0.22.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07ecc0cd7cac091bf682ec5efa18b1cff79d617b84181f38b3951dbe135f607f"
|
||||
dependencies = [
|
||||
"ring 0.16.20",
|
||||
"untrusted 0.7.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "0.22.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87"
|
||||
dependencies = [
|
||||
"webpki",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "0.25.3"
|
||||
@ -5943,7 +5876,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "xtask"
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"build-info",
|
||||
|
@ -21,7 +21,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.7.1"
|
||||
version = "1.8.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
@ -11,7 +11,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
actix-web = { version = "4.4.1", default-features = false }
|
||||
actix-web = { version = "4.5.1", default-features = false }
|
||||
anyhow = "1.0.79"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.3.0"
|
||||
|
@ -14,18 +14,18 @@ default-run = "meilisearch"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.7.0"
|
||||
actix-http = { version = "3.5.1", default-features = false, features = [
|
||||
actix-http = { version = "3.6.0", default-features = false, features = [
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"rustls",
|
||||
"rustls-0_21",
|
||||
] }
|
||||
actix-utils = "3.0.1"
|
||||
actix-web = { version = "4.4.1", default-features = false, features = [
|
||||
actix-web = { version = "4.5.1", default-features = false, features = [
|
||||
"macros",
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"cookies",
|
||||
"rustls",
|
||||
"rustls-0_21",
|
||||
] }
|
||||
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
|
||||
anyhow = { version = "1.0.79", features = ["backtrace"] }
|
||||
@ -52,7 +52,7 @@ index-scheduler = { path = "../index-scheduler" }
|
||||
indexmap = { version = "2.1.0", features = ["serde"] }
|
||||
is-terminal = "0.4.10"
|
||||
itertools = "0.11.0"
|
||||
jsonwebtoken = "8.3.0"
|
||||
jsonwebtoken = "9.2.0"
|
||||
lazy_static = "1.4.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
@ -75,7 +75,7 @@ reqwest = { version = "0.11.23", features = [
|
||||
"rustls-tls",
|
||||
"json",
|
||||
], default-features = false }
|
||||
rustls = "0.20.8"
|
||||
rustls = "0.21.6"
|
||||
rustls-pemfile = "1.0.2"
|
||||
segment = { version = "0.2.3", optional = true }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
|
@ -151,7 +151,7 @@ async fn run_http(
|
||||
.keep_alive(KeepAlive::Os);
|
||||
|
||||
if let Some(config) = opt_clone.get_ssl_config()? {
|
||||
http_server.bind_rustls(opt_clone.http_addr, config)?.run().await?;
|
||||
http_server.bind_rustls_021(opt_clone.http_addr, config)?.run().await?;
|
||||
} else {
|
||||
http_server.bind(&opt_clone.http_addr)?.run().await?;
|
||||
}
|
||||
|
@ -564,11 +564,11 @@ impl Opt {
|
||||
}
|
||||
if self.ssl_require_auth {
|
||||
let verifier = AllowAnyAuthenticatedClient::new(client_auth_roots);
|
||||
config.with_client_cert_verifier(verifier)
|
||||
config.with_client_cert_verifier(Arc::from(verifier))
|
||||
} else {
|
||||
let verifier =
|
||||
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots);
|
||||
config.with_client_cert_verifier(verifier)
|
||||
config.with_client_cert_verifier(Arc::from(verifier))
|
||||
}
|
||||
}
|
||||
None => config.with_no_client_auth(),
|
||||
|
@ -604,6 +604,7 @@ fn embedder_analytics(
|
||||
EmbedderSource::OpenAi => sources.insert("openAi"),
|
||||
EmbedderSource::HuggingFace => sources.insert("huggingFace"),
|
||||
EmbedderSource::UserProvided => sources.insert("userProvided"),
|
||||
EmbedderSource::Ollama => sources.insert("ollama"),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
@ -530,7 +530,7 @@ pub fn perform_search(
|
||||
// The attributes to retrieve are the ones explicitly marked as to retrieve (all by default),
|
||||
// but these attributes must be also be present
|
||||
// - in the fields_ids_map
|
||||
// - in the the displayed attributes
|
||||
// - in the displayed attributes
|
||||
let to_retrieve_ids: BTreeSet<_> = query
|
||||
.attributes_to_retrieve
|
||||
.as_ref()
|
||||
@ -671,27 +671,16 @@ pub fn perform_search(
|
||||
|
||||
let sort_facet_values_by =
|
||||
index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?;
|
||||
let default_sort_facet_values_by =
|
||||
sort_facet_values_by.get("*").copied().unwrap_or_default();
|
||||
|
||||
if fields.iter().all(|f| f != "*") {
|
||||
let fields: Vec<_> = fields
|
||||
.iter()
|
||||
.map(|n| {
|
||||
(
|
||||
n,
|
||||
sort_facet_values_by
|
||||
.get(n)
|
||||
.copied()
|
||||
.unwrap_or(default_sort_facet_values_by),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
let fields: Vec<_> =
|
||||
fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect();
|
||||
facet_distribution.facets(fields);
|
||||
}
|
||||
|
||||
let distribution = facet_distribution
|
||||
.candidates(candidates)
|
||||
.default_order_by(default_sort_facet_values_by)
|
||||
.default_order_by(sort_facet_values_by.get("*"))
|
||||
.execute()?;
|
||||
let stats = facet_distribution.compute_stats()?;
|
||||
(Some(distribution), Some(stats))
|
||||
|
@ -1237,8 +1237,8 @@ async fn error_add_documents_missing_document_id() {
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore] // // TODO: Fix in an other PR: this does not provoke any error.
|
||||
async fn error_document_field_limit_reached() {
|
||||
#[should_panic]
|
||||
async fn error_document_field_limit_reached_in_one_document() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
@ -1246,22 +1246,241 @@ async fn error_document_field_limit_reached() {
|
||||
|
||||
let mut big_object = std::collections::HashMap::new();
|
||||
big_object.insert("id".to_owned(), "wow");
|
||||
for i in 0..65535 {
|
||||
for i in 0..(u16::MAX as usize + 1) {
|
||||
let key = i.to_string();
|
||||
big_object.insert(key, "I am a text!");
|
||||
}
|
||||
|
||||
let documents = json!([big_object]);
|
||||
|
||||
let (_response, code) = index.update_documents(documents, Some("id")).await;
|
||||
snapshot!(code, @"202");
|
||||
let (response, code) = index.update_documents(documents, Some("id")).await;
|
||||
snapshot!(code, @"500 Internal Server Error");
|
||||
|
||||
index.wait_task(0).await;
|
||||
let (response, code) = index.get_task(0).await;
|
||||
snapshot!(code, @"200");
|
||||
let response = index.wait_task(response.uid()).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
// Documents without a primary key are not accepted.
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@"");
|
||||
snapshot!(response,
|
||||
@r###"
|
||||
{
|
||||
"uid": 1,
|
||||
"indexUid": "test",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_document_field_limit_reached_over_multiple_documents() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.create(Some("id")).await;
|
||||
|
||||
let mut big_object = std::collections::HashMap::new();
|
||||
big_object.insert("id".to_owned(), "wow");
|
||||
for i in 0..(u16::MAX / 2) {
|
||||
let key = i.to_string();
|
||||
big_object.insert(key, "I am a text!");
|
||||
}
|
||||
|
||||
let documents = json!([big_object]);
|
||||
|
||||
let (response, code) = index.update_documents(documents, Some("id")).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
|
||||
let response = index.wait_task(response.uid()).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(response,
|
||||
@r###"
|
||||
{
|
||||
"uid": 1,
|
||||
"indexUid": "test",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let mut big_object = std::collections::HashMap::new();
|
||||
big_object.insert("id".to_owned(), "waw");
|
||||
for i in (u16::MAX as usize / 2)..(u16::MAX as usize + 1) {
|
||||
let key = i.to_string();
|
||||
big_object.insert(key, "I am a text!");
|
||||
}
|
||||
|
||||
let documents = json!([big_object]);
|
||||
|
||||
let (response, code) = index.update_documents(documents, Some("id")).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
|
||||
let response = index.wait_task(response.uid()).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(response,
|
||||
@r###"
|
||||
{
|
||||
"uid": 2,
|
||||
"indexUid": "test",
|
||||
"status": "failed",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "A document cannot contain more than 65,535 fields.",
|
||||
"code": "max_fields_limit_exceeded",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#max_fields_limit_exceeded"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_document_field_limit_reached_in_one_nested_document() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.create(Some("id")).await;
|
||||
|
||||
let mut nested = std::collections::HashMap::new();
|
||||
for i in 0..(u16::MAX as usize + 1) {
|
||||
let key = i.to_string();
|
||||
nested.insert(key, "I am a text!");
|
||||
}
|
||||
let mut big_object = std::collections::HashMap::new();
|
||||
big_object.insert("id".to_owned(), "wow");
|
||||
|
||||
let documents = json!([big_object]);
|
||||
|
||||
let (response, code) = index.update_documents(documents, Some("id")).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
|
||||
let response = index.wait_task(response.uid()).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
// Documents without a primary key are not accepted.
|
||||
snapshot!(response,
|
||||
@r###"
|
||||
{
|
||||
"uid": 1,
|
||||
"indexUid": "test",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_document_field_limit_reached_over_multiple_documents_with_nested_fields() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.create(Some("id")).await;
|
||||
|
||||
let mut nested = std::collections::HashMap::new();
|
||||
for i in 0..(u16::MAX / 2) {
|
||||
let key = i.to_string();
|
||||
nested.insert(key, "I am a text!");
|
||||
}
|
||||
let mut big_object = std::collections::HashMap::new();
|
||||
big_object.insert("id".to_owned(), "wow");
|
||||
|
||||
let documents = json!([big_object]);
|
||||
|
||||
let (response, code) = index.update_documents(documents, Some("id")).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
|
||||
let response = index.wait_task(response.uid()).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(response,
|
||||
@r###"
|
||||
{
|
||||
"uid": 1,
|
||||
"indexUid": "test",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let mut nested = std::collections::HashMap::new();
|
||||
for i in 0..(u16::MAX / 2) {
|
||||
let key = i.to_string();
|
||||
nested.insert(key, "I am a text!");
|
||||
}
|
||||
let mut big_object = std::collections::HashMap::new();
|
||||
big_object.insert("id".to_owned(), "wow");
|
||||
|
||||
let documents = json!([big_object]);
|
||||
|
||||
let (response, code) = index.update_documents(documents, Some("id")).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
|
||||
let response = index.wait_task(response.uid()).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(response,
|
||||
@r###"
|
||||
{
|
||||
"uid": 2,
|
||||
"indexUid": "test",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
@ -123,6 +123,28 @@ async fn simple_facet_search_with_max_values() {
|
||||
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_facet_search_by_count_with_max_values() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index
|
||||
.update_settings_faceting(
|
||||
json!({ "maxValuesPerFacet": 1, "sortFacetValuesBy": { "*": "count" } }),
|
||||
)
|
||||
.await;
|
||||
index.update_settings_filterable_attributes(json!(["genres"])).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn non_filterable_facet_search_error() {
|
||||
let server = Server::new().await;
|
||||
@ -157,3 +179,24 @@ async fn facet_search_dont_support_words() {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["facetHits"].as_array().unwrap().len(), 0);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_facet_search_with_sort_by_count() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_faceting(json!({ "sortFacetValuesBy": { "*": "count" } })).await;
|
||||
index.update_settings_filterable_attributes(json!(["genres"])).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
let (response, code) =
|
||||
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let hits = response["facetHits"].as_array().unwrap();
|
||||
assert_eq!(hits.len(), 2);
|
||||
assert_eq!(hits[0], json!({ "value": "Action", "count": 3 }));
|
||||
assert_eq!(hits[1], json!({ "value": "Adventure", "count": 2 }));
|
||||
}
|
||||
|
@ -20,13 +20,13 @@ use crate::heed_codec::facet::{
|
||||
use crate::heed_codec::{
|
||||
BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec,
|
||||
};
|
||||
use crate::order_by_map::OrderByMap;
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::vector::EmbeddingConfig;
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
|
||||
OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16,
|
||||
BEU32, BEU64,
|
||||
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64,
|
||||
};
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||
@ -1373,21 +1373,19 @@ impl Index {
|
||||
self.main.remap_key_type::<Str>().delete(txn, main_key::MAX_VALUES_PER_FACET)
|
||||
}
|
||||
|
||||
pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<HashMap<String, OrderBy>> {
|
||||
let mut orders = self
|
||||
pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<OrderByMap> {
|
||||
let orders = self
|
||||
.main
|
||||
.remap_types::<Str, SerdeJson<HashMap<String, OrderBy>>>()
|
||||
.remap_types::<Str, SerdeJson<OrderByMap>>()
|
||||
.get(txn, main_key::SORT_FACET_VALUES_BY)?
|
||||
.unwrap_or_default();
|
||||
// Insert the default ordering if it is not already overwritten by the user.
|
||||
orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic);
|
||||
Ok(orders)
|
||||
}
|
||||
|
||||
pub(crate) fn put_sort_facet_values_by(
|
||||
&self,
|
||||
txn: &mut RwTxn,
|
||||
val: &HashMap<String, OrderBy>,
|
||||
val: &OrderByMap,
|
||||
) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, SerdeJson<_>>().put(txn, main_key::SORT_FACET_VALUES_BY, &val)
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ pub mod facet;
|
||||
mod fields_ids_map;
|
||||
pub mod heed_codec;
|
||||
pub mod index;
|
||||
pub mod order_by_map;
|
||||
pub mod prompt;
|
||||
pub mod proximity;
|
||||
pub mod score_details;
|
||||
@ -56,10 +57,10 @@ pub use self::heed_codec::{
|
||||
UncheckedU8StrStrCodec,
|
||||
};
|
||||
pub use self::index::Index;
|
||||
pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
|
||||
pub use self::search::{
|
||||
FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder,
|
||||
MatchingWords, OrderBy, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy,
|
||||
DEFAULT_VALUES_PER_FACET,
|
||||
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
|
||||
Search, SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||
|
57
milli/src/order_by_map.rs
Normal file
57
milli/src/order_by_map.rs
Normal file
@ -0,0 +1,57 @@
|
||||
use std::collections::{hash_map, HashMap};
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
|
||||
use crate::OrderBy;
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct OrderByMap(HashMap<String, OrderBy>);
|
||||
|
||||
impl OrderByMap {
|
||||
pub fn get(&self, key: impl AsRef<str>) -> OrderBy {
|
||||
self.0
|
||||
.get(key.as_ref())
|
||||
.copied()
|
||||
.unwrap_or_else(|| self.0.get("*").copied().unwrap_or_default())
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, key: String, value: OrderBy) -> Option<OrderBy> {
|
||||
self.0.insert(key, value)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for OrderByMap {
|
||||
fn default() -> Self {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("*".to_string(), OrderBy::Lexicographic);
|
||||
OrderByMap(map)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromIterator<(String, OrderBy)> for OrderByMap {
|
||||
fn from_iter<T: IntoIterator<Item = (String, OrderBy)>>(iter: T) -> Self {
|
||||
OrderByMap(iter.into_iter().collect())
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoIterator for OrderByMap {
|
||||
type Item = (String, OrderBy);
|
||||
type IntoIter = hash_map::IntoIter<String, OrderBy>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.0.into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for OrderByMap {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let mut map = Deserialize::deserialize(deserializer).map(OrderByMap)?;
|
||||
// Insert the default ordering if it is not already overwritten by the user.
|
||||
map.0.entry("*".to_string()).or_insert(OrderBy::default());
|
||||
Ok(map)
|
||||
}
|
||||
}
|
@ -168,7 +168,7 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
}
|
||||
|
||||
// should we stop?
|
||||
// We should if the the search range doesn't include any
|
||||
// We should if the search range doesn't include any
|
||||
// element from the previous key or its successors
|
||||
let should_stop = {
|
||||
match self.right {
|
||||
@ -232,7 +232,7 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
}
|
||||
|
||||
// should we stop?
|
||||
// We should if the the search range doesn't include any
|
||||
// We should if the search range doesn't include any
|
||||
// element from the previous key or its successors
|
||||
let should_stop = {
|
||||
match self.right {
|
||||
|
@ -6,15 +6,18 @@ use roaring::RoaringBitmap;
|
||||
|
||||
pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::filter::{BadGeoError, Filter};
|
||||
pub use self::search::{FacetValueHit, SearchForFacetValues};
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::{Index, Result};
|
||||
|
||||
mod facet_distribution;
|
||||
mod facet_distribution_iter;
|
||||
mod facet_range_search;
|
||||
mod facet_sort_ascending;
|
||||
mod facet_sort_descending;
|
||||
mod filter;
|
||||
mod search;
|
||||
|
||||
fn facet_extreme_value<'t>(
|
||||
mut extreme_it: impl Iterator<Item = heed::Result<(RoaringBitmap, &'t [u8])>> + 't,
|
||||
|
326
milli/src/search/facet/search.rs
Normal file
326
milli/src/search/facet/search.rs
Normal file
@ -0,0 +1,326 @@
|
||||
use std::cmp::{Ordering, Reverse};
|
||||
use std::collections::BinaryHeap;
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use charabia::normalizer::NormalizerOption;
|
||||
use charabia::Normalize;
|
||||
use fst::automaton::{Automaton, Str};
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use roaring::RoaringBitmap;
|
||||
use tracing::error;
|
||||
|
||||
use crate::error::UserError;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||
use crate::search::build_dfa;
|
||||
use crate::{DocumentId, FieldId, OrderBy, Result, Search};
|
||||
|
||||
/// The maximum number of values per facet returned by the facet search route.
|
||||
const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100;
|
||||
|
||||
pub struct SearchForFacetValues<'a> {
|
||||
query: Option<String>,
|
||||
facet: String,
|
||||
search_query: Search<'a>,
|
||||
max_values: usize,
|
||||
is_hybrid: bool,
|
||||
}
|
||||
|
||||
impl<'a> SearchForFacetValues<'a> {
|
||||
pub fn new(
|
||||
facet: String,
|
||||
search_query: Search<'a>,
|
||||
is_hybrid: bool,
|
||||
) -> SearchForFacetValues<'a> {
|
||||
SearchForFacetValues {
|
||||
query: None,
|
||||
facet,
|
||||
search_query,
|
||||
max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET,
|
||||
is_hybrid,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
|
||||
self.query = Some(query.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn max_values(&mut self, max: usize) -> &mut Self {
|
||||
self.max_values = max;
|
||||
self
|
||||
}
|
||||
|
||||
fn one_original_value_of(
|
||||
&self,
|
||||
field_id: FieldId,
|
||||
facet_str: &str,
|
||||
any_docid: DocumentId,
|
||||
) -> Result<Option<String>> {
|
||||
let index = self.search_query.index;
|
||||
let rtxn = self.search_query.rtxn;
|
||||
let key: (FieldId, _, &str) = (field_id, any_docid, facet_str);
|
||||
Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned()))
|
||||
}
|
||||
|
||||
pub fn execute(&self) -> Result<Vec<FacetValueHit>> {
|
||||
let index = self.search_query.index;
|
||||
let rtxn = self.search_query.rtxn;
|
||||
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
if !filterable_fields.contains(&self.facet) {
|
||||
let (valid_fields, hidden_fields) =
|
||||
index.remove_hidden_fields(rtxn, filterable_fields)?;
|
||||
|
||||
return Err(UserError::InvalidFacetSearchFacetName {
|
||||
field: self.facet.clone(),
|
||||
valid_fields,
|
||||
hidden_fields,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let fid = match fields_ids_map.id(&self.facet) {
|
||||
Some(fid) => fid,
|
||||
// we return an empty list of results when the attribute has been
|
||||
// set as filterable but no document contains this field (yet).
|
||||
None => return Ok(Vec::new()),
|
||||
};
|
||||
|
||||
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? {
|
||||
Some(fst) => fst,
|
||||
None => return Ok(Vec::new()),
|
||||
};
|
||||
|
||||
let search_candidates = self
|
||||
.search_query
|
||||
.execute_for_candidates(self.is_hybrid || self.search_query.vector.is_some())?;
|
||||
|
||||
let mut results = match index.sort_facet_values_by(rtxn)?.get(&self.facet) {
|
||||
OrderBy::Lexicographic => ValuesCollection::by_lexicographic(self.max_values),
|
||||
OrderBy::Count => ValuesCollection::by_count(self.max_values),
|
||||
};
|
||||
|
||||
match self.query.as_ref() {
|
||||
Some(query) => {
|
||||
let options = NormalizerOption { lossy: true, ..Default::default() };
|
||||
let query = query.normalize(&options);
|
||||
let query = query.as_ref();
|
||||
|
||||
let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
|
||||
let field_authorizes_typos =
|
||||
!self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
|
||||
|
||||
if authorize_typos && field_authorizes_typos {
|
||||
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
|
||||
if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
|
||||
if fst.contains(query) {
|
||||
self.fetch_original_facets_using_normalized(
|
||||
fid,
|
||||
query,
|
||||
query,
|
||||
&search_candidates,
|
||||
&mut results,
|
||||
)?;
|
||||
}
|
||||
} else {
|
||||
let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
|
||||
let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
|
||||
|
||||
let is_prefix = true;
|
||||
let automaton = if query.len() < one_typo as usize {
|
||||
build_dfa(query, 0, is_prefix)
|
||||
} else if query.len() < two_typos as usize {
|
||||
build_dfa(query, 1, is_prefix)
|
||||
} else {
|
||||
build_dfa(query, 2, is_prefix)
|
||||
};
|
||||
|
||||
let mut stream = fst.search(automaton).into_stream();
|
||||
while let Some(facet_value) = stream.next() {
|
||||
let value = std::str::from_utf8(facet_value)?;
|
||||
if self
|
||||
.fetch_original_facets_using_normalized(
|
||||
fid,
|
||||
value,
|
||||
query,
|
||||
&search_candidates,
|
||||
&mut results,
|
||||
)?
|
||||
.is_break()
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let automaton = Str::new(query).starts_with();
|
||||
let mut stream = fst.search(automaton).into_stream();
|
||||
while let Some(facet_value) = stream.next() {
|
||||
let value = std::str::from_utf8(facet_value)?;
|
||||
if self
|
||||
.fetch_original_facets_using_normalized(
|
||||
fid,
|
||||
value,
|
||||
query,
|
||||
&search_candidates,
|
||||
&mut results,
|
||||
)?
|
||||
.is_break()
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
|
||||
for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
|
||||
let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
|
||||
result?;
|
||||
let count = search_candidates.intersection_len(&bitmap);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, left_bound, bitmap.min().unwrap())?
|
||||
.unwrap_or_else(|| left_bound.to_string());
|
||||
if results.insert(FacetValueHit { value, count }).is_break() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results.into_sorted_vec())
|
||||
}
|
||||
|
||||
fn fetch_original_facets_using_normalized(
|
||||
&self,
|
||||
fid: FieldId,
|
||||
value: &str,
|
||||
query: &str,
|
||||
search_candidates: &RoaringBitmap,
|
||||
results: &mut ValuesCollection,
|
||||
) -> Result<ControlFlow<()>> {
|
||||
let index = self.search_query.index;
|
||||
let rtxn = self.search_query.rtxn;
|
||||
|
||||
let database = index.facet_id_normalized_string_strings;
|
||||
let key = (fid, value);
|
||||
let original_strings = match database.get(rtxn, &key)? {
|
||||
Some(original_strings) => original_strings,
|
||||
None => {
|
||||
error!("the facet value is missing from the facet database: {key:?}");
|
||||
return Ok(ControlFlow::Continue(()));
|
||||
}
|
||||
};
|
||||
for original in original_strings {
|
||||
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() };
|
||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
||||
None => {
|
||||
error!("the facet value is missing from the facet database: {key:?}");
|
||||
return Ok(ControlFlow::Continue(()));
|
||||
}
|
||||
};
|
||||
let count = search_candidates.intersection_len(&docids);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, &original, docids.min().unwrap())?
|
||||
.unwrap_or_else(|| query.to_string());
|
||||
if results.insert(FacetValueHit { value, count }).is_break() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ControlFlow::Continue(()))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize, PartialEq)]
|
||||
pub struct FacetValueHit {
|
||||
/// The original facet value
|
||||
pub value: String,
|
||||
/// The number of documents associated to this facet
|
||||
pub count: u64,
|
||||
}
|
||||
|
||||
impl PartialOrd for FacetValueHit {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for FacetValueHit {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
self.count.cmp(&other.count).then_with(|| self.value.cmp(&other.value))
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for FacetValueHit {}
|
||||
|
||||
/// A wrapper type that collects the best facet values by
|
||||
/// lexicographic or number of associated values.
|
||||
enum ValuesCollection {
|
||||
/// Keeps the top values according to the lexicographic order.
|
||||
Lexicographic { max: usize, content: Vec<FacetValueHit> },
|
||||
/// Keeps the top values according to the number of values associated to them.
|
||||
///
|
||||
/// Note that it is a max heap and we need to move the smallest counts
|
||||
/// at the top to be able to pop them when we reach the max_values limit.
|
||||
Count { max: usize, content: BinaryHeap<Reverse<FacetValueHit>> },
|
||||
}
|
||||
|
||||
impl ValuesCollection {
|
||||
pub fn by_lexicographic(max: usize) -> Self {
|
||||
ValuesCollection::Lexicographic { max, content: Vec::new() }
|
||||
}
|
||||
|
||||
pub fn by_count(max: usize) -> Self {
|
||||
ValuesCollection::Count { max, content: BinaryHeap::new() }
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, value: FacetValueHit) -> ControlFlow<()> {
|
||||
match self {
|
||||
ValuesCollection::Lexicographic { max, content } => {
|
||||
if content.len() < *max {
|
||||
content.push(value);
|
||||
if content.len() < *max {
|
||||
return ControlFlow::Continue(());
|
||||
}
|
||||
}
|
||||
ControlFlow::Break(())
|
||||
}
|
||||
ValuesCollection::Count { max, content } => {
|
||||
if content.len() == *max {
|
||||
// Peeking gives us the worst value in the list as
|
||||
// this is a max-heap and we reversed it.
|
||||
let Some(mut peek) = content.peek_mut() else { return ControlFlow::Break(()) };
|
||||
if peek.0.count <= value.count {
|
||||
// Replace the current worst value in the heap
|
||||
// with the new one we received that is better.
|
||||
*peek = Reverse(value);
|
||||
}
|
||||
} else {
|
||||
content.push(Reverse(value));
|
||||
}
|
||||
ControlFlow::Continue(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the list of facet values in descending order of, either,
|
||||
/// count or lexicographic order of the value depending on the type.
|
||||
pub fn into_sorted_vec(self) -> Vec<FacetValueHit> {
|
||||
match self {
|
||||
ValuesCollection::Lexicographic { content, .. } => content.into_iter().collect(),
|
||||
ValuesCollection::Count { content, .. } => {
|
||||
// Convert the heap into a vec of hits by removing the Reverse wrapper.
|
||||
// Hits are already in the right order as they were reversed and there
|
||||
// are output in ascending order.
|
||||
content.into_sorted_vec().into_iter().map(|Reverse(hit)| hit).collect()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,25 +1,17 @@
|
||||
use std::fmt;
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use charabia::normalizer::NormalizerOption;
|
||||
use charabia::Normalize;
|
||||
use fst::automaton::{Automaton, Str};
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
||||
use once_cell::sync::Lazy;
|
||||
use roaring::bitmap::RoaringBitmap;
|
||||
use tracing::error;
|
||||
|
||||
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
|
||||
use self::new::{execute_vector_search, PartialSearchResult};
|
||||
use crate::error::UserError;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::vector::DistributionShift;
|
||||
use crate::{
|
||||
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index,
|
||||
Result, SearchContext,
|
||||
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result,
|
||||
SearchContext,
|
||||
};
|
||||
|
||||
// Building these factories is not free.
|
||||
@ -27,9 +19,6 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
||||
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
|
||||
static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
|
||||
|
||||
/// The maximum number of values per facet returned by the facet search route.
|
||||
const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100;
|
||||
|
||||
pub mod facet;
|
||||
mod fst_utils;
|
||||
pub mod hybrid;
|
||||
@ -302,240 +291,6 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SearchForFacetValues<'a> {
|
||||
query: Option<String>,
|
||||
facet: String,
|
||||
search_query: Search<'a>,
|
||||
max_values: usize,
|
||||
is_hybrid: bool,
|
||||
}
|
||||
|
||||
impl<'a> SearchForFacetValues<'a> {
|
||||
pub fn new(
|
||||
facet: String,
|
||||
search_query: Search<'a>,
|
||||
is_hybrid: bool,
|
||||
) -> SearchForFacetValues<'a> {
|
||||
SearchForFacetValues {
|
||||
query: None,
|
||||
facet,
|
||||
search_query,
|
||||
max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET,
|
||||
is_hybrid,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
|
||||
self.query = Some(query.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn max_values(&mut self, max: usize) -> &mut Self {
|
||||
self.max_values = max;
|
||||
self
|
||||
}
|
||||
|
||||
fn one_original_value_of(
|
||||
&self,
|
||||
field_id: FieldId,
|
||||
facet_str: &str,
|
||||
any_docid: DocumentId,
|
||||
) -> Result<Option<String>> {
|
||||
let index = self.search_query.index;
|
||||
let rtxn = self.search_query.rtxn;
|
||||
let key: (FieldId, _, &str) = (field_id, any_docid, facet_str);
|
||||
Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned()))
|
||||
}
|
||||
|
||||
pub fn execute(&self) -> Result<Vec<FacetValueHit>> {
|
||||
let index = self.search_query.index;
|
||||
let rtxn = self.search_query.rtxn;
|
||||
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
if !filterable_fields.contains(&self.facet) {
|
||||
let (valid_fields, hidden_fields) =
|
||||
index.remove_hidden_fields(rtxn, filterable_fields)?;
|
||||
|
||||
return Err(UserError::InvalidFacetSearchFacetName {
|
||||
field: self.facet.clone(),
|
||||
valid_fields,
|
||||
hidden_fields,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let fid = match fields_ids_map.id(&self.facet) {
|
||||
Some(fid) => fid,
|
||||
// we return an empty list of results when the attribute has been
|
||||
// set as filterable but no document contains this field (yet).
|
||||
None => return Ok(Vec::new()),
|
||||
};
|
||||
|
||||
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? {
|
||||
Some(fst) => fst,
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let search_candidates = self
|
||||
.search_query
|
||||
.execute_for_candidates(self.is_hybrid || self.search_query.vector.is_some())?;
|
||||
|
||||
match self.query.as_ref() {
|
||||
Some(query) => {
|
||||
let options = NormalizerOption { lossy: true, ..Default::default() };
|
||||
let query = query.normalize(&options);
|
||||
let query = query.as_ref();
|
||||
|
||||
let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
|
||||
let field_authorizes_typos =
|
||||
!self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
|
||||
|
||||
if authorize_typos && field_authorizes_typos {
|
||||
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
|
||||
if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
|
||||
let mut results = vec![];
|
||||
if fst.contains(query) {
|
||||
self.fetch_original_facets_using_normalized(
|
||||
fid,
|
||||
query,
|
||||
query,
|
||||
&search_candidates,
|
||||
&mut results,
|
||||
)?;
|
||||
}
|
||||
Ok(results)
|
||||
} else {
|
||||
let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
|
||||
let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
|
||||
|
||||
let is_prefix = true;
|
||||
let automaton = if query.len() < one_typo as usize {
|
||||
build_dfa(query, 0, is_prefix)
|
||||
} else if query.len() < two_typos as usize {
|
||||
build_dfa(query, 1, is_prefix)
|
||||
} else {
|
||||
build_dfa(query, 2, is_prefix)
|
||||
};
|
||||
|
||||
let mut stream = fst.search(automaton).into_stream();
|
||||
let mut results = vec![];
|
||||
while let Some(facet_value) = stream.next() {
|
||||
let value = std::str::from_utf8(facet_value)?;
|
||||
if self
|
||||
.fetch_original_facets_using_normalized(
|
||||
fid,
|
||||
value,
|
||||
query,
|
||||
&search_candidates,
|
||||
&mut results,
|
||||
)?
|
||||
.is_break()
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
} else {
|
||||
let automaton = Str::new(query).starts_with();
|
||||
let mut stream = fst.search(automaton).into_stream();
|
||||
let mut results = vec![];
|
||||
while let Some(facet_value) = stream.next() {
|
||||
let value = std::str::from_utf8(facet_value)?;
|
||||
if self
|
||||
.fetch_original_facets_using_normalized(
|
||||
fid,
|
||||
value,
|
||||
query,
|
||||
&search_candidates,
|
||||
&mut results,
|
||||
)?
|
||||
.is_break()
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let mut results = vec![];
|
||||
let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
|
||||
for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
|
||||
let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
|
||||
result?;
|
||||
let count = search_candidates.intersection_len(&bitmap);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, left_bound, bitmap.min().unwrap())?
|
||||
.unwrap_or_else(|| left_bound.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
}
|
||||
if results.len() >= self.max_values {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn fetch_original_facets_using_normalized(
|
||||
&self,
|
||||
fid: FieldId,
|
||||
value: &str,
|
||||
query: &str,
|
||||
search_candidates: &RoaringBitmap,
|
||||
results: &mut Vec<FacetValueHit>,
|
||||
) -> Result<ControlFlow<()>> {
|
||||
let index = self.search_query.index;
|
||||
let rtxn = self.search_query.rtxn;
|
||||
|
||||
let database = index.facet_id_normalized_string_strings;
|
||||
let key = (fid, value);
|
||||
let original_strings = match database.get(rtxn, &key)? {
|
||||
Some(original_strings) => original_strings,
|
||||
None => {
|
||||
error!("the facet value is missing from the facet database: {key:?}");
|
||||
return Ok(ControlFlow::Continue(()));
|
||||
}
|
||||
};
|
||||
for original in original_strings {
|
||||
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() };
|
||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
||||
None => {
|
||||
error!("the facet value is missing from the facet database: {key:?}");
|
||||
return Ok(ControlFlow::Continue(()));
|
||||
}
|
||||
};
|
||||
let count = search_candidates.intersection_len(&docids);
|
||||
if count != 0 {
|
||||
let value = self
|
||||
.one_original_value_of(fid, &original, docids.min().unwrap())?
|
||||
.unwrap_or_else(|| query.to_string());
|
||||
results.push(FacetValueHit { value, count });
|
||||
}
|
||||
if results.len() >= self.max_values {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ControlFlow::Continue(()))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize, PartialEq)]
|
||||
pub struct FacetValueHit {
|
||||
/// The original facet value
|
||||
pub value: String,
|
||||
/// The number of documents associated to this facet
|
||||
pub count: u64,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
#[allow(unused_imports)]
|
||||
|
@ -5,7 +5,7 @@ The typo ranking rule should transform the query graph such that it only contain
|
||||
the combinations of word derivations that it used to compute its bucket.
|
||||
|
||||
The proximity ranking rule should then look for proximities only between those specific derivations.
|
||||
For example, given the the search query `beautiful summer` and the dataset:
|
||||
For example, given the search query `beautiful summer` and the dataset:
|
||||
```text
|
||||
{ "id": 0, "text": "beautigul summer...... beautiful day in the summer" }
|
||||
{ "id": 1, "text": "beautiful summer" }
|
||||
|
@ -14,12 +14,13 @@ use super::IndexerConfig;
|
||||
use crate::criterion::Criterion;
|
||||
use crate::error::UserError;
|
||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||
use crate::order_by_map::OrderByMap;
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::update::index_documents::IndexDocumentsMethod;
|
||||
use crate::update::{IndexDocuments, UpdateIndexingStep};
|
||||
use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings};
|
||||
use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
|
||||
use crate::{FieldsIdsMap, Index, OrderBy, Result};
|
||||
use crate::{FieldsIdsMap, Index, Result};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
||||
pub enum Setting<T> {
|
||||
@ -145,7 +146,7 @@ pub struct Settings<'a, 't, 'i> {
|
||||
/// Attributes on which typo tolerance is disabled.
|
||||
exact_attributes: Setting<HashSet<String>>,
|
||||
max_values_per_facet: Setting<usize>,
|
||||
sort_facet_values_by: Setting<HashMap<String, OrderBy>>,
|
||||
sort_facet_values_by: Setting<OrderByMap>,
|
||||
pagination_max_total_hits: Setting<usize>,
|
||||
proximity_precision: Setting<ProximityPrecision>,
|
||||
embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
|
||||
@ -340,7 +341,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
self.max_values_per_facet = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_sort_facet_values_by(&mut self, value: HashMap<String, OrderBy>) {
|
||||
pub fn set_sort_facet_values_by(&mut self, value: OrderByMap) {
|
||||
self.sort_facet_values_by = Setting::Set(value);
|
||||
}
|
||||
|
||||
@ -1186,6 +1187,13 @@ pub fn validate_embedding_settings(
|
||||
}
|
||||
}
|
||||
}
|
||||
EmbedderSource::Ollama => {
|
||||
// Dimensions get inferred, only model name is required
|
||||
check_unset(&dimensions, "dimensions", inferred_source, name)?;
|
||||
check_set(&model, "model", inferred_source, name)?;
|
||||
check_unset(&api_key, "apiKey", inferred_source, name)?;
|
||||
check_unset(&revision, "revision", inferred_source, name)?;
|
||||
}
|
||||
EmbedderSource::HuggingFace => {
|
||||
check_unset(&api_key, "apiKey", inferred_source, name)?;
|
||||
check_unset(&dimensions, "dimensions", inferred_source, name)?;
|
||||
|
@ -2,6 +2,7 @@ use std::path::PathBuf;
|
||||
|
||||
use hf_hub::api::sync::ApiError;
|
||||
|
||||
use super::ollama::OllamaError;
|
||||
use crate::error::FaultSource;
|
||||
use crate::vector::openai::OpenAiError;
|
||||
|
||||
@ -71,6 +72,17 @@ pub enum EmbedErrorKind {
|
||||
OpenAiRuntimeInit(std::io::Error),
|
||||
#[error("initializing web client for sending embedding requests failed: {0}")]
|
||||
InitWebClient(reqwest::Error),
|
||||
// Dedicated Ollama error kinds, might have to merge them into one cohesive error type for all backends.
|
||||
#[error("unexpected response from Ollama: {0}")]
|
||||
OllamaUnexpected(reqwest::Error),
|
||||
#[error("sent too many requests to Ollama: {0}")]
|
||||
OllamaTooManyRequests(OllamaError),
|
||||
#[error("received internal error from Ollama: {0}")]
|
||||
OllamaInternalServerError(OllamaError),
|
||||
#[error("model not found. Meilisearch will not automatically download models from the Ollama library, please pull the model manually: {0}")]
|
||||
OllamaModelNotFoundError(OllamaError),
|
||||
#[error("received unhandled HTTP status code {0} from Ollama")]
|
||||
OllamaUnhandledStatusCode(u16),
|
||||
}
|
||||
|
||||
impl EmbedError {
|
||||
@ -129,6 +141,26 @@ impl EmbedError {
|
||||
pub fn openai_initialize_web_client(inner: reqwest::Error) -> Self {
|
||||
Self { kind: EmbedErrorKind::InitWebClient(inner), fault: FaultSource::Runtime }
|
||||
}
|
||||
|
||||
pub(crate) fn ollama_unexpected(inner: reqwest::Error) -> EmbedError {
|
||||
Self { kind: EmbedErrorKind::OllamaUnexpected(inner), fault: FaultSource::Bug }
|
||||
}
|
||||
|
||||
pub(crate) fn ollama_model_not_found(inner: OllamaError) -> EmbedError {
|
||||
Self { kind: EmbedErrorKind::OllamaModelNotFoundError(inner), fault: FaultSource::User }
|
||||
}
|
||||
|
||||
pub(crate) fn ollama_too_many_requests(inner: OllamaError) -> EmbedError {
|
||||
Self { kind: EmbedErrorKind::OllamaTooManyRequests(inner), fault: FaultSource::Runtime }
|
||||
}
|
||||
|
||||
pub(crate) fn ollama_internal_server_error(inner: OllamaError) -> EmbedError {
|
||||
Self { kind: EmbedErrorKind::OllamaInternalServerError(inner), fault: FaultSource::Runtime }
|
||||
}
|
||||
|
||||
pub(crate) fn ollama_unhandled_status_code(code: u16) -> EmbedError {
|
||||
Self { kind: EmbedErrorKind::OllamaUnhandledStatusCode(code), fault: FaultSource::Bug }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
@ -195,6 +227,13 @@ impl NewEmbedderError {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ollama_could_not_determine_dimension(inner: EmbedError) -> NewEmbedderError {
|
||||
Self {
|
||||
kind: NewEmbedderErrorKind::CouldNotDetermineDimension(inner),
|
||||
fault: FaultSource::User,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn openai_invalid_api_key_format(inner: reqwest::header::InvalidHeaderValue) -> Self {
|
||||
Self { kind: NewEmbedderErrorKind::InvalidApiKeyFormat(inner), fault: FaultSource::User }
|
||||
}
|
||||
|
@ -10,6 +10,8 @@ pub mod manual;
|
||||
pub mod openai;
|
||||
pub mod settings;
|
||||
|
||||
pub mod ollama;
|
||||
|
||||
pub use self::error::Error;
|
||||
|
||||
pub type Embedding = Vec<f32>;
|
||||
@ -76,6 +78,7 @@ pub enum Embedder {
|
||||
HuggingFace(hf::Embedder),
|
||||
OpenAi(openai::Embedder),
|
||||
UserProvided(manual::Embedder),
|
||||
Ollama(ollama::Embedder),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
|
||||
@ -127,6 +130,7 @@ impl IntoIterator for EmbeddingConfigs {
|
||||
pub enum EmbedderOptions {
|
||||
HuggingFace(hf::EmbedderOptions),
|
||||
OpenAi(openai::EmbedderOptions),
|
||||
Ollama(ollama::EmbedderOptions),
|
||||
UserProvided(manual::EmbedderOptions),
|
||||
}
|
||||
|
||||
@ -144,6 +148,10 @@ impl EmbedderOptions {
|
||||
pub fn openai(api_key: Option<String>) -> Self {
|
||||
Self::OpenAi(openai::EmbedderOptions::with_default_model(api_key))
|
||||
}
|
||||
|
||||
pub fn ollama() -> Self {
|
||||
Self::Ollama(ollama::EmbedderOptions::with_default_model())
|
||||
}
|
||||
}
|
||||
|
||||
impl Embedder {
|
||||
@ -151,6 +159,7 @@ impl Embedder {
|
||||
Ok(match options {
|
||||
EmbedderOptions::HuggingFace(options) => Self::HuggingFace(hf::Embedder::new(options)?),
|
||||
EmbedderOptions::OpenAi(options) => Self::OpenAi(openai::Embedder::new(options)?),
|
||||
EmbedderOptions::Ollama(options) => Self::Ollama(ollama::Embedder::new(options)?),
|
||||
EmbedderOptions::UserProvided(options) => {
|
||||
Self::UserProvided(manual::Embedder::new(options))
|
||||
}
|
||||
@ -167,6 +176,10 @@ impl Embedder {
|
||||
let client = embedder.new_client()?;
|
||||
embedder.embed(texts, &client).await
|
||||
}
|
||||
Embedder::Ollama(embedder) => {
|
||||
let client = embedder.new_client()?;
|
||||
embedder.embed(texts, &client).await
|
||||
}
|
||||
Embedder::UserProvided(embedder) => embedder.embed(texts),
|
||||
}
|
||||
}
|
||||
@ -181,6 +194,7 @@ impl Embedder {
|
||||
match self {
|
||||
Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks),
|
||||
Embedder::OpenAi(embedder) => embedder.embed_chunks(text_chunks),
|
||||
Embedder::Ollama(embedder) => embedder.embed_chunks(text_chunks),
|
||||
Embedder::UserProvided(embedder) => embedder.embed_chunks(text_chunks),
|
||||
}
|
||||
}
|
||||
@ -189,6 +203,7 @@ impl Embedder {
|
||||
match self {
|
||||
Embedder::HuggingFace(embedder) => embedder.chunk_count_hint(),
|
||||
Embedder::OpenAi(embedder) => embedder.chunk_count_hint(),
|
||||
Embedder::Ollama(embedder) => embedder.chunk_count_hint(),
|
||||
Embedder::UserProvided(_) => 1,
|
||||
}
|
||||
}
|
||||
@ -197,6 +212,7 @@ impl Embedder {
|
||||
match self {
|
||||
Embedder::HuggingFace(embedder) => embedder.prompt_count_in_chunk_hint(),
|
||||
Embedder::OpenAi(embedder) => embedder.prompt_count_in_chunk_hint(),
|
||||
Embedder::Ollama(embedder) => embedder.prompt_count_in_chunk_hint(),
|
||||
Embedder::UserProvided(_) => 1,
|
||||
}
|
||||
}
|
||||
@ -205,6 +221,7 @@ impl Embedder {
|
||||
match self {
|
||||
Embedder::HuggingFace(embedder) => embedder.dimensions(),
|
||||
Embedder::OpenAi(embedder) => embedder.dimensions(),
|
||||
Embedder::Ollama(embedder) => embedder.dimensions(),
|
||||
Embedder::UserProvided(embedder) => embedder.dimensions(),
|
||||
}
|
||||
}
|
||||
@ -213,6 +230,7 @@ impl Embedder {
|
||||
match self {
|
||||
Embedder::HuggingFace(embedder) => embedder.distribution(),
|
||||
Embedder::OpenAi(embedder) => embedder.distribution(),
|
||||
Embedder::Ollama(embedder) => embedder.distribution(),
|
||||
Embedder::UserProvided(_embedder) => None,
|
||||
}
|
||||
}
|
||||
|
307
milli/src/vector/ollama.rs
Normal file
307
milli/src/vector/ollama.rs
Normal file
@ -0,0 +1,307 @@
|
||||
// Copied from "openai.rs" with the sections I actually understand changed for Ollama.
|
||||
// The common components of the Ollama and OpenAI interfaces might need to be extracted.
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use reqwest::StatusCode;
|
||||
|
||||
use super::error::{EmbedError, NewEmbedderError};
|
||||
use super::openai::Retry;
|
||||
use super::{DistributionShift, Embedding, Embeddings};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Embedder {
|
||||
headers: reqwest::header::HeaderMap,
|
||||
options: EmbedderOptions,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
|
||||
pub struct EmbedderOptions {
|
||||
pub embedding_model: EmbeddingModel,
|
||||
}
|
||||
|
||||
#[derive(
|
||||
Debug, Clone, Hash, PartialEq, Eq, serde::Serialize, serde::Deserialize, deserr::Deserr,
|
||||
)]
|
||||
#[deserr(deny_unknown_fields)]
|
||||
pub struct EmbeddingModel {
|
||||
name: String,
|
||||
dimensions: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Serialize)]
|
||||
struct OllamaRequest<'a> {
|
||||
model: &'a str,
|
||||
prompt: &'a str,
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
struct OllamaResponse {
|
||||
embedding: Embedding,
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
pub struct OllamaError {
|
||||
error: String,
|
||||
}
|
||||
|
||||
impl EmbeddingModel {
|
||||
pub fn max_token(&self) -> usize {
|
||||
// this might not be the same for all models
|
||||
8192
|
||||
}
|
||||
|
||||
pub fn default_dimensions(&self) -> usize {
|
||||
// Dimensions for nomic-embed-text
|
||||
768
|
||||
}
|
||||
|
||||
pub fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
|
||||
pub fn from_name(name: &str) -> Self {
|
||||
Self { name: name.to_string(), dimensions: 0 }
|
||||
}
|
||||
|
||||
pub fn supports_overriding_dimensions(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for EmbeddingModel {
|
||||
fn default() -> Self {
|
||||
Self { name: "nomic-embed-text".to_string(), dimensions: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl EmbedderOptions {
|
||||
pub fn with_default_model() -> Self {
|
||||
Self { embedding_model: Default::default() }
|
||||
}
|
||||
|
||||
pub fn with_embedding_model(embedding_model: EmbeddingModel) -> Self {
|
||||
Self { embedding_model }
|
||||
}
|
||||
}
|
||||
|
||||
impl Embedder {
|
||||
pub fn new_client(&self) -> Result<reqwest::Client, EmbedError> {
|
||||
reqwest::ClientBuilder::new()
|
||||
.default_headers(self.headers.clone())
|
||||
.build()
|
||||
.map_err(EmbedError::openai_initialize_web_client)
|
||||
}
|
||||
|
||||
pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> {
|
||||
let mut headers = reqwest::header::HeaderMap::new();
|
||||
headers.insert(
|
||||
reqwest::header::CONTENT_TYPE,
|
||||
reqwest::header::HeaderValue::from_static("application/json"),
|
||||
);
|
||||
|
||||
let mut embedder = Self { options, headers };
|
||||
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_io()
|
||||
.enable_time()
|
||||
.build()
|
||||
.map_err(EmbedError::openai_runtime_init)
|
||||
.map_err(NewEmbedderError::ollama_could_not_determine_dimension)?;
|
||||
|
||||
// Get dimensions from Ollama
|
||||
let request =
|
||||
OllamaRequest { model: &embedder.options.embedding_model.name(), prompt: "test" };
|
||||
// TODO: Refactor into shared error type
|
||||
let client = embedder
|
||||
.new_client()
|
||||
.map_err(NewEmbedderError::ollama_could_not_determine_dimension)?;
|
||||
|
||||
rt.block_on(async move {
|
||||
let response = client
|
||||
.post(get_ollama_path())
|
||||
.json(&request)
|
||||
.send()
|
||||
.await
|
||||
.map_err(EmbedError::ollama_unexpected)
|
||||
.map_err(NewEmbedderError::ollama_could_not_determine_dimension)?;
|
||||
|
||||
// Process error in case model not found
|
||||
let response = Self::check_response(response).await.map_err(|_err| {
|
||||
let e = EmbedError::ollama_model_not_found(OllamaError {
|
||||
error: format!("model: {}", embedder.options.embedding_model.name()),
|
||||
});
|
||||
NewEmbedderError::ollama_could_not_determine_dimension(e)
|
||||
})?;
|
||||
|
||||
let response: OllamaResponse = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(EmbedError::ollama_unexpected)
|
||||
.map_err(NewEmbedderError::ollama_could_not_determine_dimension)?;
|
||||
|
||||
let embedding = Embeddings::from_single_embedding(response.embedding);
|
||||
|
||||
embedder.options.embedding_model.dimensions = embedding.dimension();
|
||||
|
||||
tracing::info!(
|
||||
"ollama model {} with dimensionality {} added",
|
||||
embedder.options.embedding_model.name(),
|
||||
embedding.dimension()
|
||||
);
|
||||
|
||||
Ok(embedder)
|
||||
})
|
||||
}
|
||||
|
||||
async fn check_response(response: reqwest::Response) -> Result<reqwest::Response, Retry> {
|
||||
if !response.status().is_success() {
|
||||
// Not the same number of possible error cases covered as with OpenAI.
|
||||
match response.status() {
|
||||
StatusCode::TOO_MANY_REQUESTS => {
|
||||
let error_response: OllamaError = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(EmbedError::ollama_unexpected)
|
||||
.map_err(Retry::retry_later)?;
|
||||
|
||||
return Err(Retry::rate_limited(EmbedError::ollama_too_many_requests(
|
||||
OllamaError { error: error_response.error },
|
||||
)));
|
||||
}
|
||||
StatusCode::SERVICE_UNAVAILABLE => {
|
||||
let error_response: OllamaError = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(EmbedError::ollama_unexpected)
|
||||
.map_err(Retry::retry_later)?;
|
||||
return Err(Retry::retry_later(EmbedError::ollama_internal_server_error(
|
||||
OllamaError { error: error_response.error },
|
||||
)));
|
||||
}
|
||||
StatusCode::NOT_FOUND => {
|
||||
let error_response: OllamaError = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(EmbedError::ollama_unexpected)
|
||||
.map_err(Retry::give_up)?;
|
||||
|
||||
return Err(Retry::give_up(EmbedError::ollama_model_not_found(OllamaError {
|
||||
error: error_response.error,
|
||||
})));
|
||||
}
|
||||
code => {
|
||||
return Err(Retry::give_up(EmbedError::ollama_unhandled_status_code(
|
||||
code.as_u16(),
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
pub async fn embed(
|
||||
&self,
|
||||
texts: Vec<String>,
|
||||
client: &reqwest::Client,
|
||||
) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
||||
// Ollama only embedds one document at a time.
|
||||
let mut results = Vec::with_capacity(texts.len());
|
||||
|
||||
// The retry loop is inside the texts loop, might have to switch that around
|
||||
for text in texts {
|
||||
// Retries copied from openai.rs
|
||||
for attempt in 0..7 {
|
||||
let retry_duration = match self.try_embed(&text, client).await {
|
||||
Ok(result) => {
|
||||
results.push(result);
|
||||
break;
|
||||
}
|
||||
Err(retry) => {
|
||||
tracing::warn!("Failed: {}", retry.error);
|
||||
retry.into_duration(attempt)
|
||||
}
|
||||
}?;
|
||||
tracing::warn!(
|
||||
"Attempt #{}, retrying after {}ms.",
|
||||
attempt,
|
||||
retry_duration.as_millis()
|
||||
);
|
||||
tokio::time::sleep(retry_duration).await;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
async fn try_embed(
|
||||
&self,
|
||||
text: &str,
|
||||
client: &reqwest::Client,
|
||||
) -> Result<Embeddings<f32>, Retry> {
|
||||
let request = OllamaRequest { model: &self.options.embedding_model.name(), prompt: text };
|
||||
let response = client
|
||||
.post(get_ollama_path())
|
||||
.json(&request)
|
||||
.send()
|
||||
.await
|
||||
.map_err(EmbedError::openai_network)
|
||||
.map_err(Retry::retry_later)?;
|
||||
|
||||
let response = Self::check_response(response).await?;
|
||||
|
||||
let response: OllamaResponse = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(EmbedError::openai_unexpected)
|
||||
.map_err(Retry::retry_later)?;
|
||||
|
||||
tracing::trace!("response: {:?}", response.embedding);
|
||||
|
||||
let embedding = Embeddings::from_single_embedding(response.embedding);
|
||||
Ok(embedding)
|
||||
}
|
||||
|
||||
pub fn embed_chunks(
|
||||
&self,
|
||||
text_chunks: Vec<Vec<String>>,
|
||||
) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_io()
|
||||
.enable_time()
|
||||
.build()
|
||||
.map_err(EmbedError::openai_runtime_init)?;
|
||||
let client = self.new_client()?;
|
||||
rt.block_on(futures::future::try_join_all(
|
||||
text_chunks.into_iter().map(|prompts| self.embed(prompts, &client)),
|
||||
))
|
||||
}
|
||||
|
||||
// Defaults copied from openai.rs
|
||||
pub fn chunk_count_hint(&self) -> usize {
|
||||
10
|
||||
}
|
||||
|
||||
pub fn prompt_count_in_chunk_hint(&self) -> usize {
|
||||
10
|
||||
}
|
||||
|
||||
pub fn dimensions(&self) -> usize {
|
||||
self.options.embedding_model.dimensions
|
||||
}
|
||||
|
||||
pub fn distribution(&self) -> Option<DistributionShift> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for OllamaError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.error)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_ollama_path() -> String {
|
||||
// Important: Hostname not enough, has to be entire path to embeddings endpoint
|
||||
std::env::var("MEILI_OLLAMA_URL").unwrap_or("http://localhost:11434/api/embeddings".to_string())
|
||||
}
|
@ -419,12 +419,12 @@ impl Embedder {
|
||||
|
||||
// retrying in case of failure
|
||||
|
||||
struct Retry {
|
||||
error: EmbedError,
|
||||
pub struct Retry {
|
||||
pub error: EmbedError,
|
||||
strategy: RetryStrategy,
|
||||
}
|
||||
|
||||
enum RetryStrategy {
|
||||
pub enum RetryStrategy {
|
||||
GiveUp,
|
||||
Retry,
|
||||
RetryTokenized,
|
||||
@ -432,23 +432,23 @@ enum RetryStrategy {
|
||||
}
|
||||
|
||||
impl Retry {
|
||||
fn give_up(error: EmbedError) -> Self {
|
||||
pub fn give_up(error: EmbedError) -> Self {
|
||||
Self { error, strategy: RetryStrategy::GiveUp }
|
||||
}
|
||||
|
||||
fn retry_later(error: EmbedError) -> Self {
|
||||
pub fn retry_later(error: EmbedError) -> Self {
|
||||
Self { error, strategy: RetryStrategy::Retry }
|
||||
}
|
||||
|
||||
fn retry_tokenized(error: EmbedError) -> Self {
|
||||
pub fn retry_tokenized(error: EmbedError) -> Self {
|
||||
Self { error, strategy: RetryStrategy::RetryTokenized }
|
||||
}
|
||||
|
||||
fn rate_limited(error: EmbedError) -> Self {
|
||||
pub fn rate_limited(error: EmbedError) -> Self {
|
||||
Self { error, strategy: RetryStrategy::RetryAfterRateLimit }
|
||||
}
|
||||
|
||||
fn into_duration(self, attempt: u32) -> Result<tokio::time::Duration, EmbedError> {
|
||||
pub fn into_duration(self, attempt: u32) -> Result<tokio::time::Duration, EmbedError> {
|
||||
match self.strategy {
|
||||
RetryStrategy::GiveUp => Err(self.error),
|
||||
RetryStrategy::Retry => Ok(tokio::time::Duration::from_millis((10u64).pow(attempt))),
|
||||
@ -459,11 +459,11 @@ impl Retry {
|
||||
}
|
||||
}
|
||||
|
||||
fn must_tokenize(&self) -> bool {
|
||||
pub fn must_tokenize(&self) -> bool {
|
||||
matches!(self.strategy, RetryStrategy::RetryTokenized)
|
||||
}
|
||||
|
||||
fn into_error(self) -> EmbedError {
|
||||
pub fn into_error(self) -> EmbedError {
|
||||
self.error
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
use deserr::Deserr;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::openai;
|
||||
use super::{ollama, openai};
|
||||
use crate::prompt::PromptData;
|
||||
use crate::update::Setting;
|
||||
use crate::vector::EmbeddingConfig;
|
||||
@ -80,11 +80,15 @@ impl EmbeddingSettings {
|
||||
Self::SOURCE => {
|
||||
&[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::UserProvided]
|
||||
}
|
||||
Self::MODEL => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
|
||||
Self::MODEL => {
|
||||
&[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama]
|
||||
}
|
||||
Self::REVISION => &[EmbedderSource::HuggingFace],
|
||||
Self::API_KEY => &[EmbedderSource::OpenAi],
|
||||
Self::DIMENSIONS => &[EmbedderSource::OpenAi, EmbedderSource::UserProvided],
|
||||
Self::DOCUMENT_TEMPLATE => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
|
||||
Self::DOCUMENT_TEMPLATE => {
|
||||
&[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama]
|
||||
}
|
||||
_other => unreachable!("unknown field"),
|
||||
}
|
||||
}
|
||||
@ -101,6 +105,7 @@ impl EmbeddingSettings {
|
||||
EmbedderSource::HuggingFace => {
|
||||
&[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE]
|
||||
}
|
||||
EmbedderSource::Ollama => &[Self::SOURCE, Self::MODEL, Self::DOCUMENT_TEMPLATE],
|
||||
EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS],
|
||||
}
|
||||
}
|
||||
@ -134,6 +139,7 @@ pub enum EmbedderSource {
|
||||
#[default]
|
||||
OpenAi,
|
||||
HuggingFace,
|
||||
Ollama,
|
||||
UserProvided,
|
||||
}
|
||||
|
||||
@ -143,6 +149,7 @@ impl std::fmt::Display for EmbedderSource {
|
||||
EmbedderSource::OpenAi => "openAi",
|
||||
EmbedderSource::HuggingFace => "huggingFace",
|
||||
EmbedderSource::UserProvided => "userProvided",
|
||||
EmbedderSource::Ollama => "ollama",
|
||||
};
|
||||
f.write_str(s)
|
||||
}
|
||||
@ -195,6 +202,14 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||
dimensions: options.dimensions.map(Setting::Set).unwrap_or_default(),
|
||||
document_template: Setting::Set(prompt.template),
|
||||
},
|
||||
super::EmbedderOptions::Ollama(options) => Self {
|
||||
source: Setting::Set(EmbedderSource::Ollama),
|
||||
model: Setting::Set(options.embedding_model.name().to_owned()),
|
||||
revision: Setting::NotSet,
|
||||
api_key: Setting::NotSet,
|
||||
dimensions: Setting::NotSet,
|
||||
document_template: Setting::Set(prompt.template),
|
||||
},
|
||||
super::EmbedderOptions::UserProvided(options) => Self {
|
||||
source: Setting::Set(EmbedderSource::UserProvided),
|
||||
model: Setting::NotSet,
|
||||
@ -229,6 +244,14 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
||||
}
|
||||
this.embedder_options = super::EmbedderOptions::OpenAi(options);
|
||||
}
|
||||
EmbedderSource::Ollama => {
|
||||
let mut options: ollama::EmbedderOptions =
|
||||
super::ollama::EmbedderOptions::with_default_model();
|
||||
if let Some(model) = model.set() {
|
||||
options.embedding_model = super::ollama::EmbeddingModel::from_name(&model);
|
||||
}
|
||||
this.embedder_options = super::EmbedderOptions::Ollama(options);
|
||||
}
|
||||
EmbedderSource::HuggingFace => {
|
||||
let mut options = super::hf::EmbedderOptions::default();
|
||||
if let Some(model) = model.set() {
|
||||
|
94
workloads/settings-add-remove-filters.json
Normal file
94
workloads/settings-add-remove-filters.json
Normal file
@ -0,0 +1,94 @@
|
||||
{
|
||||
"name": "settings-add-remove-filters.json",
|
||||
"run_count": 2,
|
||||
"extra_cli_args": [
|
||||
"--max-indexing-threads=4"
|
||||
],
|
||||
"assets": {
|
||||
"150k-people.json": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/150k-people.json",
|
||||
"sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
|
||||
}
|
||||
},
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/peoples/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"searchableAttributes": [
|
||||
"last_name",
|
||||
"first_name",
|
||||
"featured_job_organization_name",
|
||||
"facebook_url",
|
||||
"twitter_url",
|
||||
"linkedin_url"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"city",
|
||||
"region",
|
||||
"country_code"
|
||||
],
|
||||
"dictionary": [
|
||||
"https://",
|
||||
"http://",
|
||||
"www.",
|
||||
"crunchbase.com",
|
||||
"facebook.com",
|
||||
"twitter.com",
|
||||
"linkedin.com"
|
||||
],
|
||||
"stopWords": [
|
||||
"https://",
|
||||
"http://",
|
||||
"www.",
|
||||
"crunchbase.com",
|
||||
"facebook.com",
|
||||
"twitter.com",
|
||||
"linkedin.com"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "DontWait"
|
||||
},
|
||||
{
|
||||
"route": "indexes/peoples/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "150k-people.json"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/peoples/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"filterableAttributes": [
|
||||
"city",
|
||||
"region",
|
||||
"country_code",
|
||||
"featured_job_title",
|
||||
"featured_job_organization_name"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/peoples/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"filterableAttributes": [
|
||||
"city",
|
||||
"region",
|
||||
"country_code"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
86
workloads/settings-proximity-precision.json
Normal file
86
workloads/settings-proximity-precision.json
Normal file
@ -0,0 +1,86 @@
|
||||
{
|
||||
"name": "settings-proximity-precision.json",
|
||||
"run_count": 2,
|
||||
"extra_cli_args": [
|
||||
"--max-indexing-threads=4"
|
||||
],
|
||||
"assets": {
|
||||
"150k-people.json": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/150k-people.json",
|
||||
"sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
|
||||
}
|
||||
},
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/peoples/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"searchableAttributes": [
|
||||
"last_name",
|
||||
"first_name",
|
||||
"featured_job_organization_name",
|
||||
"facebook_url",
|
||||
"twitter_url",
|
||||
"linkedin_url"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"city",
|
||||
"region",
|
||||
"country_code",
|
||||
"featured_job_title",
|
||||
"featured_job_organization_name"
|
||||
],
|
||||
"dictionary": [
|
||||
"https://",
|
||||
"http://",
|
||||
"www.",
|
||||
"crunchbase.com",
|
||||
"facebook.com",
|
||||
"twitter.com",
|
||||
"linkedin.com"
|
||||
],
|
||||
"stopWords": [
|
||||
"https://",
|
||||
"http://",
|
||||
"www.",
|
||||
"crunchbase.com",
|
||||
"facebook.com",
|
||||
"twitter.com",
|
||||
"linkedin.com"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "DontWait"
|
||||
},
|
||||
{
|
||||
"route": "indexes/peoples/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "150k-people.json"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/peoples/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"proximityPrecision": "byAttribute"
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/peoples/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"proximityPrecision": "byWord"
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
114
workloads/settings-remove-add-swap-searchable.json
Normal file
114
workloads/settings-remove-add-swap-searchable.json
Normal file
@ -0,0 +1,114 @@
|
||||
{
|
||||
"name": "settings-remove-add-swap-searchable.json",
|
||||
"run_count": 2,
|
||||
"extra_cli_args": [
|
||||
"--max-indexing-threads=4"
|
||||
],
|
||||
"assets": {
|
||||
"150k-people.json": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/150k-people.json",
|
||||
"sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
|
||||
}
|
||||
},
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/peoples/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"searchableAttributes": [
|
||||
"last_name",
|
||||
"first_name",
|
||||
"featured_job_organization_name",
|
||||
"facebook_url",
|
||||
"twitter_url",
|
||||
"linkedin_url"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"city",
|
||||
"region",
|
||||
"country_code",
|
||||
"featured_job_title",
|
||||
"featured_job_organization_name"
|
||||
],
|
||||
"dictionary": [
|
||||
"https://",
|
||||
"http://",
|
||||
"www.",
|
||||
"crunchbase.com",
|
||||
"facebook.com",
|
||||
"twitter.com",
|
||||
"linkedin.com"
|
||||
],
|
||||
"stopWords": [
|
||||
"https://",
|
||||
"http://",
|
||||
"www.",
|
||||
"crunchbase.com",
|
||||
"facebook.com",
|
||||
"twitter.com",
|
||||
"linkedin.com"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "DontWait"
|
||||
},
|
||||
{
|
||||
"route": "indexes/peoples/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "150k-people.json"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/peoples/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"searchableAttributes": [
|
||||
"last_name",
|
||||
"first_name",
|
||||
"featured_job_organization_name"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/peoples/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"searchableAttributes": [
|
||||
"last_name",
|
||||
"first_name",
|
||||
"featured_job_organization_name",
|
||||
"facebook_url",
|
||||
"twitter_url",
|
||||
"linkedin_url"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/peoples/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"searchableAttributes": [
|
||||
"first_name",
|
||||
"last_name",
|
||||
"featured_job_organization_name",
|
||||
"facebook_url",
|
||||
"twitter_url",
|
||||
"linkedin_url"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
115
workloads/settings-typo.json
Normal file
115
workloads/settings-typo.json
Normal file
@ -0,0 +1,115 @@
|
||||
{
|
||||
"name": "settings-typo.json",
|
||||
"run_count": 2,
|
||||
"extra_cli_args": [
|
||||
"--max-indexing-threads=4"
|
||||
],
|
||||
"assets": {
|
||||
"150k-people.json": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/150k-people.json",
|
||||
"sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
|
||||
}
|
||||
},
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/peoples/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"searchableAttributes": [
|
||||
"last_name",
|
||||
"first_name",
|
||||
"featured_job_title",
|
||||
"featured_job_organization_name",
|
||||
"facebook_url",
|
||||
"twitter_url",
|
||||
"linkedin_url"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"city",
|
||||
"region",
|
||||
"country_code",
|
||||
"featured_job_title",
|
||||
"featured_job_organization_name"
|
||||
],
|
||||
"dictionary": [
|
||||
"https://",
|
||||
"http://",
|
||||
"www.",
|
||||
"crunchbase.com",
|
||||
"facebook.com",
|
||||
"twitter.com",
|
||||
"linkedin.com"
|
||||
],
|
||||
"stopWords": [
|
||||
"https://",
|
||||
"http://",
|
||||
"www.",
|
||||
"crunchbase.com",
|
||||
"facebook.com",
|
||||
"twitter.com",
|
||||
"linkedin.com"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "DontWait"
|
||||
},
|
||||
{
|
||||
"route": "indexes/peoples/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "150k-people.json"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/peoples/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"typoTolerance": {
|
||||
"disableOnAttributes": ["featured_job_organization_name"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/peoples/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"typoTolerance": {
|
||||
"disableOnAttributes": []
|
||||
}
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/peoples/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"typoTolerance": {
|
||||
"disableOnWords": ["Ben","Elowitz","Kevin","Flaherty", "Ron", "Dustin", "Owen", "Chris", "Mark", "Matt", "Peter", "Van", "Head", "of"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/peoples/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"typoTolerance": {
|
||||
"disableOnWords": []
|
||||
}
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
@ -11,157 +11,179 @@ use super::client::Client;
|
||||
use super::env_info;
|
||||
use super::workload::Workload;
|
||||
|
||||
pub async fn cancel_on_ctrl_c(
|
||||
invocation_uuid: Uuid,
|
||||
dashboard_client: Client,
|
||||
abort_handle: AbortHandle,
|
||||
) {
|
||||
tracing::info!("press Ctrl-C to cancel the invocation");
|
||||
match ctrl_c().await {
|
||||
Ok(()) => {
|
||||
tracing::info!(%invocation_uuid, "received Ctrl-C, cancelling invocation");
|
||||
mark_as_failed(dashboard_client, invocation_uuid, None).await;
|
||||
abort_handle.abort();
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum DashboardClient {
|
||||
Client(Client),
|
||||
Dry,
|
||||
}
|
||||
|
||||
impl DashboardClient {
|
||||
pub fn new(dashboard_url: &str, api_key: Option<&str>) -> anyhow::Result<Self> {
|
||||
let dashboard_client = Client::new(
|
||||
Some(format!("{}/api/v1", dashboard_url)),
|
||||
api_key,
|
||||
Some(std::time::Duration::from_secs(60)),
|
||||
)?;
|
||||
|
||||
Ok(Self::Client(dashboard_client))
|
||||
}
|
||||
|
||||
pub fn new_dry() -> Self {
|
||||
Self::Dry
|
||||
}
|
||||
|
||||
pub async fn send_machine_info(&self, env: &env_info::Environment) -> anyhow::Result<()> {
|
||||
let Self::Client(dashboard_client) = self else { return Ok(()) };
|
||||
|
||||
let response = dashboard_client
|
||||
.put("machine")
|
||||
.json(&json!({"hostname": env.hostname}))
|
||||
.send()
|
||||
.await
|
||||
.context("sending machine information")?;
|
||||
if !response.status().is_success() {
|
||||
bail!(
|
||||
"could not send machine information: {} {}",
|
||||
response.status(),
|
||||
response.text().await.unwrap_or_else(|_| "unknown".into())
|
||||
);
|
||||
}
|
||||
Err(error) => tracing::warn!(
|
||||
error = &error as &dyn std::error::Error,
|
||||
"failed to listen to Ctrl-C signal, invocation won't be canceled on Ctrl-C"
|
||||
),
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn mark_as_failed(
|
||||
dashboard_client: Client,
|
||||
invocation_uuid: Uuid,
|
||||
failure_reason: Option<String>,
|
||||
) {
|
||||
let response = dashboard_client
|
||||
.post("cancel-invocation")
|
||||
.json(&json!({
|
||||
"invocation_uuid": invocation_uuid,
|
||||
"failure_reason": failure_reason,
|
||||
}))
|
||||
.send()
|
||||
.await;
|
||||
let response = match response {
|
||||
Ok(response) => response,
|
||||
Err(response_error) => {
|
||||
tracing::error!(error = &response_error as &dyn std::error::Error, %invocation_uuid, "could not mark invocation as failed");
|
||||
return;
|
||||
pub async fn create_invocation(
|
||||
&self,
|
||||
build_info: build_info::BuildInfo,
|
||||
commit_message: &str,
|
||||
env: env_info::Environment,
|
||||
max_workloads: usize,
|
||||
reason: Option<&str>,
|
||||
) -> anyhow::Result<Uuid> {
|
||||
let Self::Client(dashboard_client) = self else { return Ok(Uuid::now_v7()) };
|
||||
|
||||
let response = dashboard_client
|
||||
.put("invocation")
|
||||
.json(&json!({
|
||||
"commit": {
|
||||
"sha1": build_info.commit_sha1,
|
||||
"message": commit_message,
|
||||
"commit_date": build_info.commit_timestamp,
|
||||
"branch": build_info.branch,
|
||||
"tag": build_info.describe.and_then(|describe| describe.as_tag()),
|
||||
},
|
||||
"machine_hostname": env.hostname,
|
||||
"max_workloads": max_workloads,
|
||||
"reason": reason
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
.context("sending invocation")?;
|
||||
if !response.status().is_success() {
|
||||
bail!(
|
||||
"could not send new invocation: {}",
|
||||
response.text().await.unwrap_or_else(|_| "unknown".into())
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
if !response.status().is_success() {
|
||||
tracing::error!(
|
||||
%invocation_uuid,
|
||||
"could not mark invocation as failed: {}",
|
||||
response.text().await.unwrap()
|
||||
);
|
||||
return;
|
||||
}
|
||||
tracing::warn!(%invocation_uuid, "marked invocation as failed or canceled");
|
||||
}
|
||||
|
||||
pub async fn send_machine_info(
|
||||
dashboard_client: &Client,
|
||||
env: &env_info::Environment,
|
||||
) -> anyhow::Result<()> {
|
||||
let response = dashboard_client
|
||||
.put("machine")
|
||||
.json(&json!({"hostname": env.hostname}))
|
||||
.send()
|
||||
.await
|
||||
.context("sending machine information")?;
|
||||
if !response.status().is_success() {
|
||||
bail!(
|
||||
"could not send machine information: {} {}",
|
||||
response.status(),
|
||||
response.text().await.unwrap_or_else(|_| "unknown".into())
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn create_invocation(
|
||||
dashboard_client: &Client,
|
||||
build_info: build_info::BuildInfo,
|
||||
commit_message: &str,
|
||||
env: env_info::Environment,
|
||||
max_workloads: usize,
|
||||
reason: Option<&str>,
|
||||
) -> anyhow::Result<Uuid> {
|
||||
let response = dashboard_client
|
||||
.put("invocation")
|
||||
.json(&json!({
|
||||
"commit": {
|
||||
"sha1": build_info.commit_sha1,
|
||||
"message": commit_message,
|
||||
"commit_date": build_info.commit_timestamp,
|
||||
"branch": build_info.branch,
|
||||
"tag": build_info.describe.and_then(|describe| describe.as_tag()),
|
||||
},
|
||||
"machine_hostname": env.hostname,
|
||||
"max_workloads": max_workloads,
|
||||
"reason": reason
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
.context("sending invocation")?;
|
||||
if !response.status().is_success() {
|
||||
bail!(
|
||||
"could not send new invocation: {}",
|
||||
response.text().await.unwrap_or_else(|_| "unknown".into())
|
||||
);
|
||||
}
|
||||
let invocation_uuid: Uuid =
|
||||
response.json().await.context("could not deserialize invocation response as JSON")?;
|
||||
Ok(invocation_uuid)
|
||||
}
|
||||
|
||||
pub async fn create_workload(
|
||||
dashboard_client: &Client,
|
||||
invocation_uuid: Uuid,
|
||||
workload: &Workload,
|
||||
) -> anyhow::Result<Uuid> {
|
||||
let response = dashboard_client
|
||||
.put("workload")
|
||||
.json(&json!({
|
||||
"invocation_uuid": invocation_uuid,
|
||||
"name": &workload.name,
|
||||
"max_runs": workload.run_count,
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
.context("could not create new workload")?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
bail!("creating new workload failed: {}", response.text().await.unwrap())
|
||||
let invocation_uuid: Uuid =
|
||||
response.json().await.context("could not deserialize invocation response as JSON")?;
|
||||
Ok(invocation_uuid)
|
||||
}
|
||||
|
||||
let workload_uuid: Uuid =
|
||||
response.json().await.context("could not deserialize JSON as UUID")?;
|
||||
Ok(workload_uuid)
|
||||
}
|
||||
pub async fn create_workload(
|
||||
&self,
|
||||
invocation_uuid: Uuid,
|
||||
workload: &Workload,
|
||||
) -> anyhow::Result<Uuid> {
|
||||
let Self::Client(dashboard_client) = self else { return Ok(Uuid::now_v7()) };
|
||||
|
||||
pub async fn create_run(
|
||||
dashboard_client: Client,
|
||||
workload_uuid: Uuid,
|
||||
report: &BTreeMap<String, CallStats>,
|
||||
) -> anyhow::Result<()> {
|
||||
let response = dashboard_client
|
||||
.put("run")
|
||||
.json(&json!({
|
||||
"workload_uuid": workload_uuid,
|
||||
"data": report
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
.context("sending new run")?;
|
||||
if !response.status().is_success() {
|
||||
bail!(
|
||||
"sending new run failed: {}",
|
||||
response.text().await.unwrap_or_else(|_| "unknown".into())
|
||||
)
|
||||
let response = dashboard_client
|
||||
.put("workload")
|
||||
.json(&json!({
|
||||
"invocation_uuid": invocation_uuid,
|
||||
"name": &workload.name,
|
||||
"max_runs": workload.run_count,
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
.context("could not create new workload")?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
bail!("creating new workload failed: {}", response.text().await.unwrap())
|
||||
}
|
||||
|
||||
let workload_uuid: Uuid =
|
||||
response.json().await.context("could not deserialize JSON as UUID")?;
|
||||
Ok(workload_uuid)
|
||||
}
|
||||
|
||||
pub async fn create_run(
|
||||
&self,
|
||||
workload_uuid: Uuid,
|
||||
report: &BTreeMap<String, CallStats>,
|
||||
) -> anyhow::Result<()> {
|
||||
let Self::Client(dashboard_client) = self else { return Ok(()) };
|
||||
|
||||
let response = dashboard_client
|
||||
.put("run")
|
||||
.json(&json!({
|
||||
"workload_uuid": workload_uuid,
|
||||
"data": report
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
.context("sending new run")?;
|
||||
if !response.status().is_success() {
|
||||
bail!(
|
||||
"sending new run failed: {}",
|
||||
response.text().await.unwrap_or_else(|_| "unknown".into())
|
||||
)
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn cancel_on_ctrl_c(self, invocation_uuid: Uuid, abort_handle: AbortHandle) {
|
||||
tracing::info!("press Ctrl-C to cancel the invocation");
|
||||
match ctrl_c().await {
|
||||
Ok(()) => {
|
||||
tracing::info!(%invocation_uuid, "received Ctrl-C, cancelling invocation");
|
||||
self.mark_as_failed(invocation_uuid, None).await;
|
||||
abort_handle.abort();
|
||||
}
|
||||
Err(error) => tracing::warn!(
|
||||
error = &error as &dyn std::error::Error,
|
||||
"failed to listen to Ctrl-C signal, invocation won't be canceled on Ctrl-C"
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn mark_as_failed(&self, invocation_uuid: Uuid, failure_reason: Option<String>) {
|
||||
if let DashboardClient::Client(client) = self {
|
||||
let response = client
|
||||
.post("cancel-invocation")
|
||||
.json(&json!({
|
||||
"invocation_uuid": invocation_uuid,
|
||||
"failure_reason": failure_reason,
|
||||
}))
|
||||
.send()
|
||||
.await;
|
||||
let response = match response {
|
||||
Ok(response) => response,
|
||||
Err(response_error) => {
|
||||
tracing::error!(error = &response_error as &dyn std::error::Error, %invocation_uuid, "could not mark invocation as failed");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
if !response.status().is_success() {
|
||||
tracing::error!(
|
||||
%invocation_uuid,
|
||||
"could not mark invocation as failed: {}",
|
||||
response.text().await.unwrap()
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
tracing::warn!(%invocation_uuid, "marked invocation as failed or canceled");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
@ -50,6 +50,10 @@ pub struct BenchDeriveArgs {
|
||||
#[arg(long, default_value_t = default_dashboard_url())]
|
||||
dashboard_url: String,
|
||||
|
||||
/// Don't actually send results to the dashboard
|
||||
#[arg(long)]
|
||||
no_dashboard: bool,
|
||||
|
||||
/// Directory to output reports.
|
||||
#[arg(long, default_value_t = default_report_folder())]
|
||||
report_folder: String,
|
||||
@ -103,11 +107,11 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
||||
let assets_client =
|
||||
Client::new(None, args.assets_key.as_deref(), Some(std::time::Duration::from_secs(3600)))?; // 1h
|
||||
|
||||
let dashboard_client = Client::new(
|
||||
Some(format!("{}/api/v1", args.dashboard_url)),
|
||||
args.api_key.as_deref(),
|
||||
Some(std::time::Duration::from_secs(60)),
|
||||
)?;
|
||||
let dashboard_client = if args.no_dashboard {
|
||||
dashboard::DashboardClient::new_dry()
|
||||
} else {
|
||||
dashboard::DashboardClient::new(&args.dashboard_url, args.api_key.as_deref())?
|
||||
};
|
||||
|
||||
// reporting uses its own client because keeping the stream open to wait for entries
|
||||
// blocks any other requests
|
||||
@ -127,12 +131,12 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
||||
// enter runtime
|
||||
|
||||
rt.block_on(async {
|
||||
dashboard::send_machine_info(&dashboard_client, &env).await?;
|
||||
dashboard_client.send_machine_info(&env).await?;
|
||||
|
||||
let commit_message = build_info.commit_msg.context("missing commit message")?.split('\n').next().unwrap();
|
||||
let max_workloads = args.workload_file.len();
|
||||
let reason: Option<&str> = args.reason.as_deref();
|
||||
let invocation_uuid = dashboard::create_invocation(&dashboard_client, build_info, commit_message, env, max_workloads, reason).await?;
|
||||
let invocation_uuid = dashboard_client.create_invocation( build_info, commit_message, env, max_workloads, reason).await?;
|
||||
|
||||
tracing::info!(workload_count = args.workload_file.len(), "handling workload files");
|
||||
|
||||
@ -167,7 +171,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
||||
let abort_handle = workload_runs.abort_handle();
|
||||
tokio::spawn({
|
||||
let dashboard_client = dashboard_client.clone();
|
||||
dashboard::cancel_on_ctrl_c(invocation_uuid, dashboard_client, abort_handle)
|
||||
dashboard_client.cancel_on_ctrl_c(invocation_uuid, abort_handle)
|
||||
});
|
||||
|
||||
// wait for the end of the main task, handle result
|
||||
@ -178,7 +182,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
||||
}
|
||||
Ok(Err(error)) => {
|
||||
tracing::error!(%invocation_uuid, error = %error, "invocation failed, attempting to report the failure to dashboard");
|
||||
dashboard::mark_as_failed(dashboard_client, invocation_uuid, Some(error.to_string())).await;
|
||||
dashboard_client.mark_as_failed(invocation_uuid, Some(error.to_string())).await;
|
||||
tracing::warn!(%invocation_uuid, "invocation marked as failed following error");
|
||||
Err(error)
|
||||
},
|
||||
@ -186,7 +190,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
|
||||
match join_error.try_into_panic() {
|
||||
Ok(panic) => {
|
||||
tracing::error!("invocation panicked, attempting to report the failure to dashboard");
|
||||
dashboard::mark_as_failed(dashboard_client, invocation_uuid, Some("Panicked".into())).await;
|
||||
dashboard_client.mark_as_failed( invocation_uuid, Some("Panicked".into())).await;
|
||||
std::panic::resume_unwind(panic)
|
||||
}
|
||||
Err(_) => {
|
||||
|
@ -12,8 +12,9 @@ use uuid::Uuid;
|
||||
use super::assets::Asset;
|
||||
use super::client::Client;
|
||||
use super::command::SyncMode;
|
||||
use super::dashboard::DashboardClient;
|
||||
use super::BenchDeriveArgs;
|
||||
use crate::bench::{assets, dashboard, meili_process};
|
||||
use crate::bench::{assets, meili_process};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct Workload {
|
||||
@ -25,7 +26,7 @@ pub struct Workload {
|
||||
}
|
||||
|
||||
async fn run_commands(
|
||||
dashboard_client: &Client,
|
||||
dashboard_client: &DashboardClient,
|
||||
logs_client: &Client,
|
||||
meili_client: &Client,
|
||||
workload_uuid: Uuid,
|
||||
@ -64,7 +65,7 @@ async fn run_commands(
|
||||
#[tracing::instrument(skip(assets_client, dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = workload.name))]
|
||||
pub async fn execute(
|
||||
assets_client: &Client,
|
||||
dashboard_client: &Client,
|
||||
dashboard_client: &DashboardClient,
|
||||
logs_client: &Client,
|
||||
meili_client: &Client,
|
||||
invocation_uuid: Uuid,
|
||||
@ -74,8 +75,7 @@ pub async fn execute(
|
||||
) -> anyhow::Result<()> {
|
||||
assets::fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?;
|
||||
|
||||
let workload_uuid =
|
||||
dashboard::create_workload(dashboard_client, invocation_uuid, &workload).await?;
|
||||
let workload_uuid = dashboard_client.create_workload(invocation_uuid, &workload).await?;
|
||||
|
||||
let mut tasks = Vec::new();
|
||||
|
||||
@ -113,7 +113,7 @@ pub async fn execute(
|
||||
#[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner
|
||||
#[tracing::instrument(skip(dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = %workload.name))]
|
||||
async fn execute_run(
|
||||
dashboard_client: &Client,
|
||||
dashboard_client: &DashboardClient,
|
||||
logs_client: &Client,
|
||||
meili_client: &Client,
|
||||
workload_uuid: Uuid,
|
||||
@ -202,7 +202,7 @@ async fn start_report(
|
||||
}
|
||||
|
||||
async fn stop_report(
|
||||
dashboard_client: &Client,
|
||||
dashboard_client: &DashboardClient,
|
||||
logs_client: &Client,
|
||||
workload_uuid: Uuid,
|
||||
filename: String,
|
||||
@ -232,7 +232,7 @@ async fn stop_report(
|
||||
.context("could not convert trace to report")?;
|
||||
let context = || format!("writing report to {filename}");
|
||||
|
||||
dashboard::create_run(dashboard_client, workload_uuid, &report).await?;
|
||||
dashboard_client.create_run(workload_uuid, &report).await?;
|
||||
|
||||
let mut output_file = std::io::BufWriter::new(
|
||||
std::fs::File::options()
|
||||
|
Loading…
x
Reference in New Issue
Block a user