mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-10-30 01:38:49 +01:00
Merge branch 'main' into indexer-edition-2024
This commit is contained in:
commit
974272f2e9
53
Cargo.lock
generated
53
Cargo.lock
generated
@ -387,14 +387,14 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "arroy"
|
name = "arroy"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/meilisearch/arroy/?rev=2386594dfb009ce08821a925ccc89fb8e30bf73d#2386594dfb009ce08821a925ccc89fb8e30bf73d"
|
||||||
checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"heed",
|
"heed",
|
||||||
"log",
|
"log",
|
||||||
"memmap2",
|
"memmap2",
|
||||||
|
"nohash",
|
||||||
"ordered-float",
|
"ordered-float",
|
||||||
"rand",
|
"rand",
|
||||||
"rayon",
|
"rayon",
|
||||||
@ -471,7 +471,7 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "benchmarks"
|
name = "benchmarks"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bytes",
|
"bytes",
|
||||||
@ -652,7 +652,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "build-info"
|
name = "build-info"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"time",
|
"time",
|
||||||
@ -1621,7 +1621,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dump"
|
name = "dump"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"big_s",
|
"big_s",
|
||||||
@ -1833,7 +1833,7 @@ checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "file-store"
|
name = "file-store"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
@ -1855,7 +1855,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filter-parser"
|
name = "filter-parser"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"nom",
|
"nom",
|
||||||
@ -1875,7 +1875,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flatten-serde-json"
|
name = "flatten-serde-json"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@ -1999,7 +1999,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fuzzers"
|
name = "fuzzers"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arbitrary",
|
"arbitrary",
|
||||||
"clap",
|
"clap",
|
||||||
@ -2551,7 +2551,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "index-scheduler"
|
name = "index-scheduler"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"arroy",
|
"arroy",
|
||||||
@ -2745,7 +2745,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "json-depth-checker"
|
name = "json-depth-checker"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@ -3373,7 +3373,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meili-snap"
|
name = "meili-snap"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"md5",
|
"md5",
|
||||||
@ -3382,7 +3382,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch"
|
name = "meilisearch"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-cors",
|
"actix-cors",
|
||||||
"actix-http",
|
"actix-http",
|
||||||
@ -3471,7 +3471,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-auth"
|
name = "meilisearch-auth"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
"enum-iterator",
|
"enum-iterator",
|
||||||
@ -3490,7 +3490,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-types"
|
name = "meilisearch-types"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
@ -3520,7 +3520,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilitool"
|
name = "meilitool"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clap",
|
"clap",
|
||||||
@ -3528,6 +3528,7 @@ dependencies = [
|
|||||||
"file-store",
|
"file-store",
|
||||||
"meilisearch-auth",
|
"meilisearch-auth",
|
||||||
"meilisearch-types",
|
"meilisearch-types",
|
||||||
|
"serde",
|
||||||
"time",
|
"time",
|
||||||
"uuid",
|
"uuid",
|
||||||
]
|
]
|
||||||
@ -3550,7 +3551,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "milli"
|
name = "milli"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arroy",
|
"arroy",
|
||||||
"big_s",
|
"big_s",
|
||||||
@ -3694,6 +3695,12 @@ version = "0.0.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e"
|
checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nohash"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a0f889fb66f7acdf83442c35775764b51fed3c606ab9cee51500dbde2cf528ca"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nom"
|
name = "nom"
|
||||||
version = "7.1.3"
|
version = "7.1.3"
|
||||||
@ -3984,7 +3991,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "permissive-json-pointer"
|
name = "permissive-json-pointer"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"big_s",
|
"big_s",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@ -4841,9 +4848,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde"
|
name = "serde"
|
||||||
version = "1.0.204"
|
version = "1.0.210"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12"
|
checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"serde_derive",
|
"serde_derive",
|
||||||
]
|
]
|
||||||
@ -4859,9 +4866,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_derive"
|
name = "serde_derive"
|
||||||
version = "1.0.204"
|
version = "1.0.210"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
|
checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@ -6368,7 +6375,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "xtask"
|
name = "xtask"
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"build-info",
|
"build-info",
|
||||||
|
@ -22,7 +22,7 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "1.10.0"
|
version = "1.11.0"
|
||||||
authors = [
|
authors = [
|
||||||
"Quentin de Quelen <quentin@dequelen.me>",
|
"Quentin de Quelen <quentin@dequelen.me>",
|
||||||
"Clément Renault <clement@meilisearch.com>",
|
"Clément Renault <clement@meilisearch.com>",
|
||||||
|
@ -45,14 +45,14 @@ See the list of all our example apps in our [demos repository](https://github.co
|
|||||||
## ✨ Features
|
## ✨ Features
|
||||||
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results
|
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results
|
||||||
- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience
|
- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience
|
||||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
|
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/relevancy/typo_tolerance_settings?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
|
||||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
|
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
|
||||||
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
|
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
|
||||||
- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
|
- **[Synonym support](https://www.meilisearch.com/docs/learn/relevancy/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
|
||||||
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
|
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
|
||||||
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||||
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
|
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
|
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
|
||||||
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
||||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||||
- **Easy to install, deploy, and maintain**
|
- **Easy to install, deploy, and maintain**
|
||||||
|
@ -255,6 +255,8 @@ pub(crate) mod test {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
insta::assert_json_snapshot!(vector_index.settings().unwrap());
|
||||||
|
|
||||||
{
|
{
|
||||||
let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
|
let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
|
||||||
let mut documents = documents.unwrap();
|
let mut documents = documents.unwrap();
|
||||||
|
@ -1,783 +1,56 @@
|
|||||||
---
|
---
|
||||||
source: dump/src/reader/mod.rs
|
source: dump/src/reader/mod.rs
|
||||||
expression: document
|
expression: vector_index.settings().unwrap()
|
||||||
---
|
---
|
||||||
{
|
{
|
||||||
"id": "e3",
|
"displayedAttributes": [
|
||||||
"desc": "overriden vector + map",
|
"*"
|
||||||
"_vectors": {
|
],
|
||||||
"default": [
|
"searchableAttributes": [
|
||||||
0.2,
|
"*"
|
||||||
0.1,
|
],
|
||||||
0.1,
|
"filterableAttributes": [],
|
||||||
0.1,
|
"sortableAttributes": [],
|
||||||
0.1,
|
"rankingRules": [
|
||||||
0.1,
|
"words",
|
||||||
0.1,
|
"typo",
|
||||||
0.1,
|
"proximity",
|
||||||
0.1,
|
"attribute",
|
||||||
0.1,
|
"sort",
|
||||||
0.1,
|
"exactness"
|
||||||
0.1,
|
],
|
||||||
0.1,
|
"stopWords": [],
|
||||||
0.1,
|
"nonSeparatorTokens": [],
|
||||||
0.1,
|
"separatorTokens": [],
|
||||||
0.1,
|
"dictionary": [],
|
||||||
0.1,
|
"synonyms": {},
|
||||||
0.1,
|
"distinctAttribute": null,
|
||||||
0.1,
|
"proximityPrecision": "byWord",
|
||||||
0.1,
|
"typoTolerance": {
|
||||||
0.1,
|
"enabled": true,
|
||||||
0.1,
|
"minWordSizeForTypos": {
|
||||||
0.1,
|
"oneTypo": 5,
|
||||||
0.1,
|
"twoTypos": 9
|
||||||
0.1,
|
},
|
||||||
0.1,
|
"disableOnWords": [],
|
||||||
0.1,
|
"disableOnAttributes": []
|
||||||
0.1,
|
},
|
||||||
0.1,
|
"faceting": {
|
||||||
0.1,
|
"maxValuesPerFacet": 100,
|
||||||
0.1,
|
"sortFacetValuesBy": {
|
||||||
0.1,
|
"*": "alpha"
|
||||||
0.1,
|
}
|
||||||
0.1,
|
},
|
||||||
0.1,
|
"pagination": {
|
||||||
0.1,
|
"maxTotalHits": 1000
|
||||||
0.1,
|
},
|
||||||
0.1,
|
"embedders": {
|
||||||
0.1,
|
"default": {
|
||||||
0.1,
|
"source": "huggingFace",
|
||||||
0.1,
|
"model": "BAAI/bge-base-en-v1.5",
|
||||||
0.1,
|
"revision": "617ca489d9e86b49b8167676d8220688b99db36e",
|
||||||
0.1,
|
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}"
|
||||||
0.1,
|
}
|
||||||
0.1,
|
},
|
||||||
0.1,
|
"searchCutoffMs": null
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1,
|
|
||||||
0.1
|
|
||||||
],
|
|
||||||
"toto": [
|
|
||||||
0.1
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,780 @@
|
|||||||
|
---
|
||||||
|
source: dump/src/reader/mod.rs
|
||||||
|
expression: document
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"id": "e0",
|
||||||
|
"desc": "overriden vector",
|
||||||
|
"_vectors": {
|
||||||
|
"default": [
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1,
|
||||||
|
0.1
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
@ -27,6 +27,7 @@ pub enum Condition<'a> {
|
|||||||
LowerThanOrEqual(Token<'a>),
|
LowerThanOrEqual(Token<'a>),
|
||||||
Between { from: Token<'a>, to: Token<'a> },
|
Between { from: Token<'a>, to: Token<'a> },
|
||||||
Contains { keyword: Token<'a>, word: Token<'a> },
|
Contains { keyword: Token<'a>, word: Token<'a> },
|
||||||
|
StartsWith { keyword: Token<'a>, word: Token<'a> },
|
||||||
}
|
}
|
||||||
|
|
||||||
/// condition = value ("==" | ">" ...) value
|
/// condition = value ("==" | ">" ...) value
|
||||||
@ -121,6 +122,34 @@ pub fn parse_not_contains(input: Span) -> IResult<FilterCondition> {
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// starts with = value "CONTAINS" value
|
||||||
|
pub fn parse_starts_with(input: Span) -> IResult<FilterCondition> {
|
||||||
|
let (input, (fid, starts_with, value)) =
|
||||||
|
tuple((parse_value, tag("STARTS WITH"), cut(parse_value)))(input)?;
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
FilterCondition::Condition {
|
||||||
|
fid,
|
||||||
|
op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value },
|
||||||
|
},
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// starts with = value "NOT" WS+ "CONTAINS" value
|
||||||
|
pub fn parse_not_starts_with(input: Span) -> IResult<FilterCondition> {
|
||||||
|
let keyword = tuple((tag("NOT"), multispace1, tag("STARTS WITH")));
|
||||||
|
let (input, (fid, (_not, _spaces, starts_with), value)) =
|
||||||
|
tuple((parse_value, keyword, cut(parse_value)))(input)?;
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
FilterCondition::Not(Box::new(FilterCondition::Condition {
|
||||||
|
fid,
|
||||||
|
op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value },
|
||||||
|
})),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
/// to = value value "TO" WS+ value
|
/// to = value value "TO" WS+ value
|
||||||
pub fn parse_to(input: Span) -> IResult<FilterCondition> {
|
pub fn parse_to(input: Span) -> IResult<FilterCondition> {
|
||||||
let (input, (key, from, _, _, to)) =
|
let (input, (key, from, _, _, to)) =
|
||||||
|
@ -146,7 +146,7 @@ impl<'a> Display for Error<'a> {
|
|||||||
}
|
}
|
||||||
ErrorKind::InvalidPrimary => {
|
ErrorKind::InvalidPrimary => {
|
||||||
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
|
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
|
||||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
|
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
|
||||||
}
|
}
|
||||||
ErrorKind::InvalidEscapedNumber => {
|
ErrorKind::InvalidEscapedNumber => {
|
||||||
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
|
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
|
||||||
|
@ -49,7 +49,7 @@ use std::fmt::Debug;
|
|||||||
pub use condition::{parse_condition, parse_to, Condition};
|
pub use condition::{parse_condition, parse_to, Condition};
|
||||||
use condition::{
|
use condition::{
|
||||||
parse_contains, parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null,
|
parse_contains, parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null,
|
||||||
parse_is_null, parse_not_contains, parse_not_exists,
|
parse_is_null, parse_not_contains, parse_not_exists, parse_not_starts_with, parse_starts_with,
|
||||||
};
|
};
|
||||||
use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
|
use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
|
||||||
pub use error::{Error, ErrorKind};
|
pub use error::{Error, ErrorKind};
|
||||||
@ -166,7 +166,8 @@ impl<'a> FilterCondition<'a> {
|
|||||||
| Condition::LowerThan(_)
|
| Condition::LowerThan(_)
|
||||||
| Condition::LowerThanOrEqual(_)
|
| Condition::LowerThanOrEqual(_)
|
||||||
| Condition::Between { .. } => None,
|
| Condition::Between { .. } => None,
|
||||||
Condition::Contains { keyword, word: _ } => Some(keyword),
|
Condition::Contains { keyword, word: _ }
|
||||||
|
| Condition::StartsWith { keyword, word: _ } => Some(keyword),
|
||||||
},
|
},
|
||||||
FilterCondition::Not(this) => this.use_contains_operator(),
|
FilterCondition::Not(this) => this.use_contains_operator(),
|
||||||
FilterCondition::Or(seq) | FilterCondition::And(seq) => {
|
FilterCondition::Or(seq) | FilterCondition::And(seq) => {
|
||||||
@ -484,6 +485,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
|
|||||||
parse_to,
|
parse_to,
|
||||||
parse_contains,
|
parse_contains,
|
||||||
parse_not_contains,
|
parse_not_contains,
|
||||||
|
parse_starts_with,
|
||||||
|
parse_not_starts_with,
|
||||||
// the next lines are only for error handling and are written at the end to have the less possible performance impact
|
// the next lines are only for error handling and are written at the end to have the less possible performance impact
|
||||||
parse_geo,
|
parse_geo,
|
||||||
parse_geo_distance,
|
parse_geo_distance,
|
||||||
@ -567,6 +570,7 @@ impl<'a> std::fmt::Display for Condition<'a> {
|
|||||||
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
|
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
|
||||||
Condition::Between { from, to } => write!(f, "{from} TO {to}"),
|
Condition::Between { from, to } => write!(f, "{from} TO {to}"),
|
||||||
Condition::Contains { word, keyword: _ } => write!(f, "CONTAINS {word}"),
|
Condition::Contains { word, keyword: _ } => write!(f, "CONTAINS {word}"),
|
||||||
|
Condition::StartsWith { word, keyword: _ } => write!(f, "STARTS WITH {word}"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -680,6 +684,13 @@ pub mod tests {
|
|||||||
insta::assert_snapshot!(p("NOT subscribers NOT CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}");
|
insta::assert_snapshot!(p("NOT subscribers NOT CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}");
|
||||||
insta::assert_snapshot!(p("subscribers NOT CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})");
|
insta::assert_snapshot!(p("subscribers NOT CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})");
|
||||||
|
|
||||||
|
// Test STARTS WITH + NOT STARTS WITH
|
||||||
|
insta::assert_snapshot!(p("subscribers STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}");
|
||||||
|
insta::assert_snapshot!(p("NOT subscribers STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})");
|
||||||
|
insta::assert_snapshot!(p("subscribers NOT STARTS WITH hel"), @"NOT ({subscribers} STARTS WITH {hel})");
|
||||||
|
insta::assert_snapshot!(p("NOT subscribers NOT STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}");
|
||||||
|
insta::assert_snapshot!(p("subscribers NOT STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})");
|
||||||
|
|
||||||
// Test nested NOT
|
// Test nested NOT
|
||||||
insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
|
insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
|
||||||
insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");
|
insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");
|
||||||
@ -751,7 +762,7 @@ pub mod tests {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_snapshot!(p("'OR'"), @r###"
|
insta::assert_snapshot!(p("'OR'"), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
||||||
1:5 'OR'
|
1:5 'OR'
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -761,12 +772,12 @@ pub mod tests {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_snapshot!(p("channel Ponce"), @r###"
|
insta::assert_snapshot!(p("channel Ponce"), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
||||||
1:14 channel Ponce
|
1:14 channel Ponce
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
|
insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
||||||
19:19 channel = Ponce OR
|
19:19 channel = Ponce OR
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -851,12 +862,12 @@ pub mod tests {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
|
insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
||||||
1:17 colour NOT EXIST
|
1:17 colour NOT EXIST
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
|
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
||||||
1:23 subscribers 100 TO1000
|
1:23 subscribers 100 TO1000
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -919,35 +930,35 @@ pub mod tests {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_snapshot!(p(r#"value NULL"#), @r###"
|
insta::assert_snapshot!(p(r#"value NULL"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
|
||||||
1:11 value NULL
|
1:11 value NULL
|
||||||
"###);
|
"###);
|
||||||
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
|
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
|
||||||
1:15 value NOT NULL
|
1:15 value NOT NULL
|
||||||
"###);
|
"###);
|
||||||
insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
|
insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
|
||||||
1:12 value EMPTY
|
1:12 value EMPTY
|
||||||
"###);
|
"###);
|
||||||
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
|
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
|
||||||
1:16 value NOT EMPTY
|
1:16 value NOT EMPTY
|
||||||
"###);
|
"###);
|
||||||
insta::assert_snapshot!(p(r#"value IS"#), @r###"
|
insta::assert_snapshot!(p(r#"value IS"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
|
||||||
1:9 value IS
|
1:9 value IS
|
||||||
"###);
|
"###);
|
||||||
insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
|
insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
|
||||||
1:13 value IS NOT
|
1:13 value IS NOT
|
||||||
"###);
|
"###);
|
||||||
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
|
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
|
||||||
1:16 value IS EXISTS
|
1:16 value IS EXISTS
|
||||||
"###);
|
"###);
|
||||||
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
|
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
|
||||||
1:20 value IS NOT EXISTS
|
1:20 value IS NOT EXISTS
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
@ -212,6 +212,8 @@ fn is_keyword(s: &str) -> bool {
|
|||||||
| "NULL"
|
| "NULL"
|
||||||
| "EMPTY"
|
| "EMPTY"
|
||||||
| "CONTAINS"
|
| "CONTAINS"
|
||||||
|
| "STARTS"
|
||||||
|
| "WITH"
|
||||||
| "_geoRadius"
|
| "_geoRadius"
|
||||||
| "_geoBoundingBox"
|
| "_geoBoundingBox"
|
||||||
)
|
)
|
||||||
|
@ -41,7 +41,7 @@ ureq = "2.10.0"
|
|||||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
arroy = "0.4.0"
|
arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
|
||||||
big_s = "1.0.2"
|
big_s = "1.0.2"
|
||||||
crossbeam = "0.8.4"
|
crossbeam = "0.8.4"
|
||||||
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
||||||
|
@ -25,8 +25,9 @@ enum AutobatchKind {
|
|||||||
primary_key: Option<String>,
|
primary_key: Option<String>,
|
||||||
},
|
},
|
||||||
DocumentEdition,
|
DocumentEdition,
|
||||||
DocumentDeletion,
|
DocumentDeletion {
|
||||||
DocumentDeletionByFilter,
|
by_filter: bool,
|
||||||
|
},
|
||||||
DocumentClear,
|
DocumentClear,
|
||||||
Settings {
|
Settings {
|
||||||
allow_index_creation: bool,
|
allow_index_creation: bool,
|
||||||
@ -65,10 +66,12 @@ impl From<KindWithContent> for AutobatchKind {
|
|||||||
..
|
..
|
||||||
} => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
|
} => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
|
||||||
KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition,
|
KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition,
|
||||||
KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion,
|
KindWithContent::DocumentDeletion { .. } => {
|
||||||
|
AutobatchKind::DocumentDeletion { by_filter: false }
|
||||||
|
}
|
||||||
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
|
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
|
||||||
KindWithContent::DocumentDeletionByFilter { .. } => {
|
KindWithContent::DocumentDeletionByFilter { .. } => {
|
||||||
AutobatchKind::DocumentDeletionByFilter
|
AutobatchKind::DocumentDeletion { by_filter: true }
|
||||||
}
|
}
|
||||||
KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => {
|
KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => {
|
||||||
AutobatchKind::Settings {
|
AutobatchKind::Settings {
|
||||||
@ -105,9 +108,7 @@ pub enum BatchKind {
|
|||||||
},
|
},
|
||||||
DocumentDeletion {
|
DocumentDeletion {
|
||||||
deletion_ids: Vec<TaskId>,
|
deletion_ids: Vec<TaskId>,
|
||||||
},
|
includes_by_filter: bool,
|
||||||
DocumentDeletionByFilter {
|
|
||||||
id: TaskId,
|
|
||||||
},
|
},
|
||||||
ClearAndSettings {
|
ClearAndSettings {
|
||||||
other: Vec<TaskId>,
|
other: Vec<TaskId>,
|
||||||
@ -205,12 +206,13 @@ impl BatchKind {
|
|||||||
allow_index_creation,
|
allow_index_creation,
|
||||||
),
|
),
|
||||||
K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false),
|
K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false),
|
||||||
K::DocumentDeletion => {
|
K::DocumentDeletion { by_filter: includes_by_filter } => (
|
||||||
(Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false)
|
Continue(BatchKind::DocumentDeletion {
|
||||||
}
|
deletion_ids: vec![task_id],
|
||||||
K::DocumentDeletionByFilter => {
|
includes_by_filter,
|
||||||
(Break(BatchKind::DocumentDeletionByFilter { id: task_id }), false)
|
}),
|
||||||
}
|
false,
|
||||||
|
),
|
||||||
K::Settings { allow_index_creation } => (
|
K::Settings { allow_index_creation } => (
|
||||||
Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
|
Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
|
||||||
allow_index_creation,
|
allow_index_creation,
|
||||||
@ -228,7 +230,7 @@ impl BatchKind {
|
|||||||
|
|
||||||
match (self, kind) {
|
match (self, kind) {
|
||||||
// We don't batch any of these operations
|
// We don't batch any of these operations
|
||||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition | K::DocumentDeletionByFilter) => Break(this),
|
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break(this),
|
||||||
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
||||||
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
|
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
|
||||||
Break(this)
|
Break(this)
|
||||||
@ -264,7 +266,7 @@ impl BatchKind {
|
|||||||
// The index deletion can batch with everything but must stop after
|
// The index deletion can batch with everything but must stop after
|
||||||
(
|
(
|
||||||
BatchKind::DocumentClear { mut ids }
|
BatchKind::DocumentClear { mut ids }
|
||||||
| BatchKind::DocumentDeletion { deletion_ids: mut ids }
|
| BatchKind::DocumentDeletion { deletion_ids: mut ids, includes_by_filter: _ }
|
||||||
| BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids }
|
| BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids }
|
||||||
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
|
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
|
||||||
K::IndexDeletion,
|
K::IndexDeletion,
|
||||||
@ -284,7 +286,7 @@ impl BatchKind {
|
|||||||
|
|
||||||
(
|
(
|
||||||
BatchKind::DocumentClear { mut ids },
|
BatchKind::DocumentClear { mut ids },
|
||||||
K::DocumentClear | K::DocumentDeletion,
|
K::DocumentClear | K::DocumentDeletion { by_filter: _ },
|
||||||
) => {
|
) => {
|
||||||
ids.push(id);
|
ids.push(id);
|
||||||
Continue(BatchKind::DocumentClear { ids })
|
Continue(BatchKind::DocumentClear { ids })
|
||||||
@ -328,7 +330,7 @@ impl BatchKind {
|
|||||||
}
|
}
|
||||||
(
|
(
|
||||||
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids },
|
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids },
|
||||||
K::DocumentDeletion,
|
K::DocumentDeletion { by_filter: false },
|
||||||
) => {
|
) => {
|
||||||
operation_ids.push(id);
|
operation_ids.push(id);
|
||||||
|
|
||||||
@ -339,6 +341,13 @@ impl BatchKind {
|
|||||||
operation_ids,
|
operation_ids,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
// We can't batch a document operation with a delete by filter
|
||||||
|
(
|
||||||
|
this @ BatchKind::DocumentOperation { .. },
|
||||||
|
K::DocumentDeletion { by_filter: true },
|
||||||
|
) => {
|
||||||
|
Break(this)
|
||||||
|
}
|
||||||
// but we can't autobatch documents if it's not the same kind
|
// but we can't autobatch documents if it's not the same kind
|
||||||
// this match branch MUST be AFTER the previous one
|
// this match branch MUST be AFTER the previous one
|
||||||
(
|
(
|
||||||
@ -357,13 +366,18 @@ impl BatchKind {
|
|||||||
operation_ids,
|
operation_ids,
|
||||||
}),
|
}),
|
||||||
|
|
||||||
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentClear) => {
|
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => {
|
||||||
deletion_ids.push(id);
|
deletion_ids.push(id);
|
||||||
Continue(BatchKind::DocumentClear { ids: deletion_ids })
|
Continue(BatchKind::DocumentClear { ids: deletion_ids })
|
||||||
}
|
}
|
||||||
|
// we can't autobatch the deletion and import if the document deletion contained a filter
|
||||||
|
(
|
||||||
|
this @ BatchKind::DocumentDeletion { deletion_ids: _, includes_by_filter: true },
|
||||||
|
K::DocumentImport { .. }
|
||||||
|
) => Break(this),
|
||||||
// we can autobatch the deletion and import if the index already exists
|
// we can autobatch the deletion and import if the index already exists
|
||||||
(
|
(
|
||||||
BatchKind::DocumentDeletion { mut deletion_ids },
|
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
|
||||||
K::DocumentImport { method, allow_index_creation, primary_key }
|
K::DocumentImport { method, allow_index_creation, primary_key }
|
||||||
) if index_already_exists => {
|
) if index_already_exists => {
|
||||||
deletion_ids.push(id);
|
deletion_ids.push(id);
|
||||||
@ -377,7 +391,7 @@ impl BatchKind {
|
|||||||
}
|
}
|
||||||
// we can autobatch the deletion and import if both can't create an index
|
// we can autobatch the deletion and import if both can't create an index
|
||||||
(
|
(
|
||||||
BatchKind::DocumentDeletion { mut deletion_ids },
|
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
|
||||||
K::DocumentImport { method, allow_index_creation, primary_key }
|
K::DocumentImport { method, allow_index_creation, primary_key }
|
||||||
) if !allow_index_creation => {
|
) if !allow_index_creation => {
|
||||||
deletion_ids.push(id);
|
deletion_ids.push(id);
|
||||||
@ -396,9 +410,9 @@ impl BatchKind {
|
|||||||
) => {
|
) => {
|
||||||
Break(this)
|
Break(this)
|
||||||
}
|
}
|
||||||
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentDeletion) => {
|
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter }, K::DocumentDeletion { by_filter }) => {
|
||||||
deletion_ids.push(id);
|
deletion_ids.push(id);
|
||||||
Continue(BatchKind::DocumentDeletion { deletion_ids })
|
Continue(BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: includes_by_filter | by_filter })
|
||||||
}
|
}
|
||||||
(this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this),
|
(this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this),
|
||||||
|
|
||||||
@ -412,7 +426,7 @@ impl BatchKind {
|
|||||||
}),
|
}),
|
||||||
(
|
(
|
||||||
this @ BatchKind::Settings { .. },
|
this @ BatchKind::Settings { .. },
|
||||||
K::DocumentImport { .. } | K::DocumentDeletion,
|
K::DocumentImport { .. } | K::DocumentDeletion { .. },
|
||||||
) => Break(this),
|
) => Break(this),
|
||||||
(
|
(
|
||||||
BatchKind::Settings { mut settings_ids, allow_index_creation },
|
BatchKind::Settings { mut settings_ids, allow_index_creation },
|
||||||
@ -443,7 +457,7 @@ impl BatchKind {
|
|||||||
settings_ids,
|
settings_ids,
|
||||||
allow_index_creation,
|
allow_index_creation,
|
||||||
},
|
},
|
||||||
K::DocumentDeletion,
|
K::DocumentDeletion { .. },
|
||||||
) => {
|
) => {
|
||||||
other.push(id);
|
other.push(id);
|
||||||
Continue(BatchKind::ClearAndSettings {
|
Continue(BatchKind::ClearAndSettings {
|
||||||
@ -505,7 +519,7 @@ impl BatchKind {
|
|||||||
// this MUST be AFTER the two previous branch
|
// this MUST be AFTER the two previous branch
|
||||||
(
|
(
|
||||||
this @ BatchKind::SettingsAndDocumentOperation { .. },
|
this @ BatchKind::SettingsAndDocumentOperation { .. },
|
||||||
K::DocumentDeletion | K::DocumentImport { .. },
|
K::DocumentDeletion { .. } | K::DocumentImport { .. },
|
||||||
) => Break(this),
|
) => Break(this),
|
||||||
(
|
(
|
||||||
BatchKind::SettingsAndDocumentOperation { mut settings_ids, method, allow_index_creation,primary_key, operation_ids },
|
BatchKind::SettingsAndDocumentOperation { mut settings_ids, method, allow_index_creation,primary_key, operation_ids },
|
||||||
@ -525,8 +539,7 @@ impl BatchKind {
|
|||||||
| BatchKind::IndexDeletion { .. }
|
| BatchKind::IndexDeletion { .. }
|
||||||
| BatchKind::IndexUpdate { .. }
|
| BatchKind::IndexUpdate { .. }
|
||||||
| BatchKind::IndexSwap { .. }
|
| BatchKind::IndexSwap { .. }
|
||||||
| BatchKind::DocumentEdition { .. }
|
| BatchKind::DocumentEdition { .. },
|
||||||
| BatchKind::DocumentDeletionByFilter { .. },
|
|
||||||
_,
|
_,
|
||||||
) => {
|
) => {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
@ -616,6 +629,13 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn doc_del_fil() -> KindWithContent {
|
||||||
|
KindWithContent::DocumentDeletionByFilter {
|
||||||
|
index_uid: String::from("doggo"),
|
||||||
|
filter_expr: serde_json::json!("cuteness > 100"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn doc_clr() -> KindWithContent {
|
fn doc_clr() -> KindWithContent {
|
||||||
KindWithContent::DocumentClear { index_uid: String::from("doggo") }
|
KindWithContent::DocumentClear { index_uid: String::from("doggo") }
|
||||||
}
|
}
|
||||||
@ -676,10 +696,16 @@ mod tests {
|
|||||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||||
|
|
||||||
// we can autobatch one or multiple DocumentDeletion together
|
// we can autobatch one or multiple DocumentDeletion together
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
|
||||||
debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||||
debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2] }, false))");
|
debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
|
||||||
|
|
||||||
|
// we can autobatch one or multiple DocumentDeletionByFilter together
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(false,None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
|
||||||
|
|
||||||
// we can autobatch one or multiple Settings together
|
// we can autobatch one or multiple Settings together
|
||||||
debug_snapshot!(autobatch_from(true, None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
|
debug_snapshot!(autobatch_from(true, None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
|
||||||
@ -722,25 +748,63 @@ mod tests {
|
|||||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||||
|
|
||||||
|
// But we can't autobatch document addition with document deletion by filter
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||||
|
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||||
|
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||||
|
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||||
|
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||||
|
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||||
|
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||||
|
// And the other way around
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn simple_document_operation_dont_autobatch_with_other() {
|
fn simple_document_operation_dont_autobatch_with_other() {
|
||||||
// addition, updates and deletion can't batch together
|
// addition, updates and deletion by filter can't batch together
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
|
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -807,6 +871,7 @@ mod tests {
|
|||||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||||
@ -816,6 +881,7 @@ mod tests {
|
|||||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||||
@ -827,6 +893,7 @@ mod tests {
|
|||||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
debug_snapshot!(autobatch_from(true, None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||||
debug_snapshot!(autobatch_from(true, None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
debug_snapshot!(autobatch_from(true, None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||||
@ -836,6 +903,7 @@ mod tests {
|
|||||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||||
debug_snapshot!(autobatch_from(false,None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
debug_snapshot!(autobatch_from(false,None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||||
|
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||||
debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||||
debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||||
debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||||
@ -901,10 +969,10 @@ mod tests {
|
|||||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||||
|
|
||||||
// batch deletion and addition
|
// batch deletion and addition
|
||||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -113,9 +113,9 @@ pub(crate) enum IndexOperation {
|
|||||||
index_uid: String,
|
index_uid: String,
|
||||||
task: Task,
|
task: Task,
|
||||||
},
|
},
|
||||||
IndexDocumentDeletionByFilter {
|
DocumentDeletion {
|
||||||
index_uid: String,
|
index_uid: String,
|
||||||
task: Task,
|
tasks: Vec<Task>,
|
||||||
},
|
},
|
||||||
DocumentClear {
|
DocumentClear {
|
||||||
index_uid: String,
|
index_uid: String,
|
||||||
@ -168,11 +168,11 @@ impl Batch {
|
|||||||
Batch::IndexOperation { op, .. } => match op {
|
Batch::IndexOperation { op, .. } => match op {
|
||||||
IndexOperation::DocumentOperation { tasks, .. }
|
IndexOperation::DocumentOperation { tasks, .. }
|
||||||
| IndexOperation::Settings { tasks, .. }
|
| IndexOperation::Settings { tasks, .. }
|
||||||
|
| IndexOperation::DocumentDeletion { tasks, .. }
|
||||||
| IndexOperation::DocumentClear { tasks, .. } => {
|
| IndexOperation::DocumentClear { tasks, .. } => {
|
||||||
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
|
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
|
||||||
}
|
}
|
||||||
IndexOperation::DocumentEdition { task, .. }
|
IndexOperation::DocumentEdition { task, .. } => {
|
||||||
| IndexOperation::IndexDocumentDeletionByFilter { task, .. } => {
|
|
||||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||||
}
|
}
|
||||||
IndexOperation::SettingsAndDocumentOperation {
|
IndexOperation::SettingsAndDocumentOperation {
|
||||||
@ -237,7 +237,7 @@ impl IndexOperation {
|
|||||||
match self {
|
match self {
|
||||||
IndexOperation::DocumentOperation { index_uid, .. }
|
IndexOperation::DocumentOperation { index_uid, .. }
|
||||||
| IndexOperation::DocumentEdition { index_uid, .. }
|
| IndexOperation::DocumentEdition { index_uid, .. }
|
||||||
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
|
| IndexOperation::DocumentDeletion { index_uid, .. }
|
||||||
| IndexOperation::DocumentClear { index_uid, .. }
|
| IndexOperation::DocumentClear { index_uid, .. }
|
||||||
| IndexOperation::Settings { index_uid, .. }
|
| IndexOperation::Settings { index_uid, .. }
|
||||||
| IndexOperation::DocumentClearAndSetting { index_uid, .. }
|
| IndexOperation::DocumentClearAndSetting { index_uid, .. }
|
||||||
@ -255,8 +255,8 @@ impl fmt::Display for IndexOperation {
|
|||||||
IndexOperation::DocumentEdition { .. } => {
|
IndexOperation::DocumentEdition { .. } => {
|
||||||
f.write_str("IndexOperation::DocumentEdition")
|
f.write_str("IndexOperation::DocumentEdition")
|
||||||
}
|
}
|
||||||
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
|
IndexOperation::DocumentDeletion { .. } => {
|
||||||
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
|
f.write_str("IndexOperation::DocumentDeletion")
|
||||||
}
|
}
|
||||||
IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
|
IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
|
||||||
IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
|
IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
|
||||||
@ -292,21 +292,6 @@ impl IndexScheduler {
|
|||||||
},
|
},
|
||||||
must_create_index,
|
must_create_index,
|
||||||
})),
|
})),
|
||||||
BatchKind::DocumentDeletionByFilter { id } => {
|
|
||||||
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
|
||||||
match &task.kind {
|
|
||||||
KindWithContent::DocumentDeletionByFilter { index_uid, .. } => {
|
|
||||||
Ok(Some(Batch::IndexOperation {
|
|
||||||
op: IndexOperation::IndexDocumentDeletionByFilter {
|
|
||||||
index_uid: index_uid.clone(),
|
|
||||||
task,
|
|
||||||
},
|
|
||||||
must_create_index: false,
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
BatchKind::DocumentEdition { id } => {
|
BatchKind::DocumentEdition { id } => {
|
||||||
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||||
match &task.kind {
|
match &task.kind {
|
||||||
@ -369,30 +354,11 @@ impl IndexScheduler {
|
|||||||
must_create_index,
|
must_create_index,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
BatchKind::DocumentDeletion { deletion_ids } => {
|
BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: _ } => {
|
||||||
let tasks = self.get_existing_tasks(rtxn, deletion_ids)?;
|
let tasks = self.get_existing_tasks(rtxn, deletion_ids)?;
|
||||||
|
|
||||||
let mut operations = Vec::with_capacity(tasks.len());
|
|
||||||
let mut documents_counts = Vec::with_capacity(tasks.len());
|
|
||||||
for task in &tasks {
|
|
||||||
match task.kind {
|
|
||||||
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
|
|
||||||
operations.push(DocumentOperation::Delete(documents_ids.clone()));
|
|
||||||
documents_counts.push(documents_ids.len() as u64);
|
|
||||||
}
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Some(Batch::IndexOperation {
|
Ok(Some(Batch::IndexOperation {
|
||||||
op: IndexOperation::DocumentOperation {
|
op: IndexOperation::DocumentDeletion { index_uid, tasks },
|
||||||
index_uid,
|
|
||||||
primary_key: None,
|
|
||||||
method: IndexDocumentsMethod::ReplaceDocuments,
|
|
||||||
documents_counts,
|
|
||||||
operations,
|
|
||||||
tasks,
|
|
||||||
},
|
|
||||||
must_create_index,
|
must_create_index,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
@ -1430,7 +1396,7 @@ impl IndexScheduler {
|
|||||||
{
|
{
|
||||||
(original_filter, context, function)
|
(original_filter, context, function)
|
||||||
} else {
|
} else {
|
||||||
// In the case of a `documentDeleteByFilter` the details MUST be set
|
// In the case of a `documentEdition` the details MUST be set
|
||||||
unreachable!();
|
unreachable!();
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1460,52 +1426,102 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
Ok(vec![task])
|
Ok(vec![task])
|
||||||
}
|
}
|
||||||
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
|
IndexOperation::DocumentDeletion { mut tasks, index_uid: _ } => {
|
||||||
let filter =
|
let mut to_delete = RoaringBitmap::new();
|
||||||
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
|
let external_documents_ids = index.external_documents_ids();
|
||||||
&task.kind
|
|
||||||
{
|
|
||||||
filter_expr
|
|
||||||
} else {
|
|
||||||
unreachable!()
|
|
||||||
};
|
|
||||||
let deleted_documents = delete_document_by_filter(
|
|
||||||
index_wtxn,
|
|
||||||
filter,
|
|
||||||
self.index_mapper.indexer_config(),
|
|
||||||
self.must_stop_processing.clone(),
|
|
||||||
index,
|
|
||||||
);
|
|
||||||
let original_filter = if let Some(Details::DocumentDeletionByFilter {
|
|
||||||
original_filter,
|
|
||||||
deleted_documents: _,
|
|
||||||
}) = task.details
|
|
||||||
{
|
|
||||||
original_filter
|
|
||||||
} else {
|
|
||||||
// In the case of a `documentDeleteByFilter` the details MUST be set
|
|
||||||
unreachable!();
|
|
||||||
};
|
|
||||||
|
|
||||||
match deleted_documents {
|
for task in tasks.iter_mut() {
|
||||||
Ok(deleted_documents) => {
|
let before = to_delete.len();
|
||||||
task.status = Status::Succeeded;
|
task.status = Status::Succeeded;
|
||||||
task.details = Some(Details::DocumentDeletionByFilter {
|
|
||||||
original_filter,
|
match &task.kind {
|
||||||
deleted_documents: Some(deleted_documents),
|
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
|
||||||
});
|
for id in documents_ids {
|
||||||
}
|
if let Some(id) = external_documents_ids.get(index_wtxn, id)? {
|
||||||
Err(e) => {
|
to_delete.insert(id);
|
||||||
task.status = Status::Failed;
|
}
|
||||||
task.details = Some(Details::DocumentDeletionByFilter {
|
}
|
||||||
original_filter,
|
let will_be_removed = to_delete.len() - before;
|
||||||
deleted_documents: Some(0),
|
task.details = Some(Details::DocumentDeletion {
|
||||||
});
|
provided_ids: documents_ids.len(),
|
||||||
task.error = Some(e.into());
|
deleted_documents: Some(will_be_removed),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => {
|
||||||
|
let before = to_delete.len();
|
||||||
|
let filter = match Filter::from_json(filter_expr) {
|
||||||
|
Ok(filter) => filter,
|
||||||
|
Err(err) => {
|
||||||
|
// theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens
|
||||||
|
task.status = Status::Failed;
|
||||||
|
task.error = match err {
|
||||||
|
milli::Error::UserError(
|
||||||
|
milli::UserError::InvalidFilterExpression { .. },
|
||||||
|
) => Some(
|
||||||
|
Error::from(err)
|
||||||
|
.with_custom_error_code(Code::InvalidDocumentFilter)
|
||||||
|
.into(),
|
||||||
|
),
|
||||||
|
e => Some(e.into()),
|
||||||
|
};
|
||||||
|
None
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if let Some(filter) = filter {
|
||||||
|
let candidates =
|
||||||
|
filter.evaluate(index_wtxn, index).map_err(|err| match err {
|
||||||
|
milli::Error::UserError(
|
||||||
|
milli::UserError::InvalidFilter(_),
|
||||||
|
) => Error::from(err)
|
||||||
|
.with_custom_error_code(Code::InvalidDocumentFilter),
|
||||||
|
e => e.into(),
|
||||||
|
});
|
||||||
|
match candidates {
|
||||||
|
Ok(candidates) => to_delete |= candidates,
|
||||||
|
Err(err) => {
|
||||||
|
task.status = Status::Failed;
|
||||||
|
task.error = Some(err.into());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
let will_be_removed = to_delete.len() - before;
|
||||||
|
if let Some(Details::DocumentDeletionByFilter {
|
||||||
|
original_filter: _,
|
||||||
|
deleted_documents,
|
||||||
|
}) = &mut task.details
|
||||||
|
{
|
||||||
|
*deleted_documents = Some(will_be_removed);
|
||||||
|
} else {
|
||||||
|
// In the case of a `documentDeleteByFilter` the details MUST be set
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(vec![task])
|
let config = IndexDocumentsConfig {
|
||||||
|
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let must_stop_processing = self.must_stop_processing.clone();
|
||||||
|
let mut builder = milli::update::IndexDocuments::new(
|
||||||
|
index_wtxn,
|
||||||
|
index,
|
||||||
|
self.index_mapper.indexer_config(),
|
||||||
|
config,
|
||||||
|
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||||
|
|| must_stop_processing.get(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let (new_builder, _count) =
|
||||||
|
builder.remove_documents_from_db_no_batch(&to_delete)?;
|
||||||
|
builder = new_builder;
|
||||||
|
|
||||||
|
let _ = builder.execute()?;
|
||||||
|
|
||||||
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
IndexOperation::Settings { index_uid: _, settings, mut tasks } => {
|
IndexOperation::Settings { index_uid: _, settings, mut tasks } => {
|
||||||
let indexer_config = self.index_mapper.indexer_config();
|
let indexer_config = self.index_mapper.indexer_config();
|
||||||
@ -1709,46 +1725,6 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn delete_document_by_filter<'a>(
|
|
||||||
wtxn: &mut RwTxn<'a>,
|
|
||||||
filter: &serde_json::Value,
|
|
||||||
indexer_config: &IndexerConfig,
|
|
||||||
must_stop_processing: MustStopProcessing,
|
|
||||||
index: &'a Index,
|
|
||||||
) -> Result<u64> {
|
|
||||||
let filter = Filter::from_json(filter)?;
|
|
||||||
Ok(if let Some(filter) = filter {
|
|
||||||
let candidates = filter.evaluate(wtxn, index).map_err(|err| match err {
|
|
||||||
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
|
|
||||||
Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
|
|
||||||
}
|
|
||||||
e => e.into(),
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let config = IndexDocumentsConfig {
|
|
||||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut builder = milli::update::IndexDocuments::new(
|
|
||||||
wtxn,
|
|
||||||
index,
|
|
||||||
indexer_config,
|
|
||||||
config,
|
|
||||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
|
||||||
|| must_stop_processing.get(),
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let (new_builder, count) = builder.remove_documents_from_db_no_batch(&candidates)?;
|
|
||||||
builder = new_builder;
|
|
||||||
|
|
||||||
let _ = builder.execute()?;
|
|
||||||
count
|
|
||||||
} else {
|
|
||||||
0
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn edit_documents_by_function<'a>(
|
fn edit_documents_by_function<'a>(
|
||||||
wtxn: &mut RwTxn<'a>,
|
wtxn: &mut RwTxn<'a>,
|
||||||
filter: &Option<serde_json::Value>,
|
filter: &Option<serde_json::Value>,
|
||||||
|
@ -87,7 +87,7 @@ impl RoFeatures {
|
|||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
} else {
|
||||||
Err(FeatureNotEnabledError {
|
Err(FeatureNotEnabledError {
|
||||||
disabled_action: "Using `CONTAINS` in a filter",
|
disabled_action: "Using `CONTAINS` or `STARTS WITH` in a filter",
|
||||||
feature: "contains filter",
|
feature: "contains filter",
|
||||||
issue_link: "https://github.com/orgs/meilisearch/discussions/763",
|
issue_link: "https://github.com/orgs/meilisearch/discussions/763",
|
||||||
}
|
}
|
||||||
|
@ -1477,7 +1477,7 @@ impl IndexScheduler {
|
|||||||
.map(
|
.map(
|
||||||
|IndexEmbeddingConfig {
|
|IndexEmbeddingConfig {
|
||||||
name,
|
name,
|
||||||
config: milli::vector::EmbeddingConfig { embedder_options, prompt },
|
config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
|
||||||
..
|
..
|
||||||
}| {
|
}| {
|
||||||
let prompt =
|
let prompt =
|
||||||
@ -1486,7 +1486,10 @@ impl IndexScheduler {
|
|||||||
{
|
{
|
||||||
let embedders = self.embedders.read().unwrap();
|
let embedders = self.embedders.read().unwrap();
|
||||||
if let Some(embedder) = embedders.get(&embedder_options) {
|
if let Some(embedder) = embedders.get(&embedder_options) {
|
||||||
return Ok((name, (embedder.clone(), prompt)));
|
return Ok((
|
||||||
|
name,
|
||||||
|
(embedder.clone(), prompt, quantized.unwrap_or_default()),
|
||||||
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1500,7 +1503,7 @@ impl IndexScheduler {
|
|||||||
let mut embedders = self.embedders.write().unwrap();
|
let mut embedders = self.embedders.write().unwrap();
|
||||||
embedders.insert(embedder_options, embedder.clone());
|
embedders.insert(embedder_options, embedder.clone());
|
||||||
}
|
}
|
||||||
Ok((name, (embedder, prompt)))
|
Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.collect();
|
.collect();
|
||||||
@ -1764,6 +1767,7 @@ mod tests {
|
|||||||
use crossbeam::channel::RecvTimeoutError;
|
use crossbeam::channel::RecvTimeoutError;
|
||||||
use file_store::File;
|
use file_store::File;
|
||||||
use insta::assert_json_snapshot;
|
use insta::assert_json_snapshot;
|
||||||
|
use maplit::btreeset;
|
||||||
use meili_snap::{json_string, snapshot};
|
use meili_snap::{json_string, snapshot};
|
||||||
use meilisearch_auth::AuthFilter;
|
use meilisearch_auth::AuthFilter;
|
||||||
use meilisearch_types::document_formats::DocumentFormatError;
|
use meilisearch_types::document_formats::DocumentFormatError;
|
||||||
@ -2553,6 +2557,117 @@ mod tests {
|
|||||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fail_in_process_batch_for_document_deletion() {
|
||||||
|
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||||
|
|
||||||
|
use meilisearch_types::settings::{Settings, Unchecked};
|
||||||
|
let mut new_settings: Box<Settings<Unchecked>> = Box::default();
|
||||||
|
new_settings.filterable_attributes = Setting::Set(btreeset!(S("catto")));
|
||||||
|
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::SettingsUpdate {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
new_settings,
|
||||||
|
is_deletion: false,
|
||||||
|
allow_index_creation: true,
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let content = r#"[
|
||||||
|
{ "id": 1, "doggo": "jean bob" },
|
||||||
|
{ "id": 2, "catto": "jorts" },
|
||||||
|
{ "id": 3, "doggo": "bork" }
|
||||||
|
]"#;
|
||||||
|
|
||||||
|
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
|
||||||
|
let documents_count = read_json(content.as_bytes(), &mut file).unwrap();
|
||||||
|
file.persist().unwrap();
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::DocumentAdditionOrUpdate {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
primary_key: Some(S("id")),
|
||||||
|
method: ReplaceDocuments,
|
||||||
|
content_file: uuid,
|
||||||
|
documents_count,
|
||||||
|
allow_index_creation: true,
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_setting_and_document_addition");
|
||||||
|
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_settings");
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_documents");
|
||||||
|
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::DocumentDeletion {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
documents_ids: vec![S("1")],
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
// This one should not be catched by Meilisearch but it's still nice to handle it because if one day we break the filters it could happens
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::DocumentDeletionByFilter {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
filter_expr: serde_json::json!(true),
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
// Should fail because the ids are not filterable
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::DocumentDeletionByFilter {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
filter_expr: serde_json::json!("id = 2"),
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::DocumentDeletionByFilter {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
filter_expr: serde_json::json!("catto EXISTS"),
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_document_deletions");
|
||||||
|
|
||||||
|
// Everything should be batched together
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_removing_the_documents");
|
||||||
|
|
||||||
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
|
let documents = index
|
||||||
|
.all_documents(&rtxn)
|
||||||
|
.unwrap()
|
||||||
|
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents_remaining_should_only_be_bork");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn do_not_batch_task_of_different_indexes() {
|
fn do_not_batch_task_of_different_indexes() {
|
||||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||||
@ -5085,7 +5200,7 @@ mod tests {
|
|||||||
let simple_hf_name = name.clone();
|
let simple_hf_name = name.clone();
|
||||||
|
|
||||||
let configs = index_scheduler.embedders(configs).unwrap();
|
let configs = index_scheduler.embedders(configs).unwrap();
|
||||||
let (hf_embedder, _) = configs.get(&simple_hf_name).unwrap();
|
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
|
||||||
let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap();
|
let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap();
|
||||||
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap();
|
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap();
|
||||||
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap();
|
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap();
|
||||||
@ -5403,7 +5518,11 @@ mod tests {
|
|||||||
),
|
),
|
||||||
prompt: PromptData {
|
prompt: PromptData {
|
||||||
template: "{{doc.doggo}}",
|
template: "{{doc.doggo}}",
|
||||||
|
max_bytes: Some(
|
||||||
|
400,
|
||||||
|
),
|
||||||
},
|
},
|
||||||
|
quantized: None,
|
||||||
},
|
},
|
||||||
user_provided: RoaringBitmap<[1, 2]>,
|
user_provided: RoaringBitmap<[1, 2]>,
|
||||||
},
|
},
|
||||||
@ -5416,28 +5535,8 @@ mod tests {
|
|||||||
|
|
||||||
// the document with the id 3 should keep its original embedding
|
// the document with the id 3 should keep its original embedding
|
||||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||||
let mut embeddings = Vec::new();
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
|
let embeddings = &embeddings["my_doggo_embedder"];
|
||||||
'vectors: for i in 0..=u8::MAX {
|
|
||||||
let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy)
|
|
||||||
.map(Some)
|
|
||||||
.or_else(|e| match e {
|
|
||||||
arroy::Error::MissingMetadata(_) => Ok(None),
|
|
||||||
e => Err(e),
|
|
||||||
})
|
|
||||||
.transpose();
|
|
||||||
|
|
||||||
let Some(reader) = reader else {
|
|
||||||
break 'vectors;
|
|
||||||
};
|
|
||||||
|
|
||||||
let embedding = reader.unwrap().item_vector(&rtxn, docid).unwrap();
|
|
||||||
if let Some(embedding) = embedding {
|
|
||||||
embeddings.push(embedding)
|
|
||||||
} else {
|
|
||||||
break 'vectors;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
snapshot!(embeddings.len(), @"1");
|
snapshot!(embeddings.len(), @"1");
|
||||||
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
||||||
@ -5617,8 +5716,12 @@ mod tests {
|
|||||||
},
|
},
|
||||||
),
|
),
|
||||||
prompt: PromptData {
|
prompt: PromptData {
|
||||||
template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||||
|
max_bytes: Some(
|
||||||
|
400,
|
||||||
|
),
|
||||||
},
|
},
|
||||||
|
quantized: None,
|
||||||
},
|
},
|
||||||
user_provided: RoaringBitmap<[0]>,
|
user_provided: RoaringBitmap<[0]>,
|
||||||
},
|
},
|
||||||
@ -5657,8 +5760,12 @@ mod tests {
|
|||||||
},
|
},
|
||||||
),
|
),
|
||||||
prompt: PromptData {
|
prompt: PromptData {
|
||||||
template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||||
|
max_bytes: Some(
|
||||||
|
400,
|
||||||
|
),
|
||||||
},
|
},
|
||||||
|
quantized: None,
|
||||||
},
|
},
|
||||||
user_provided: RoaringBitmap<[]>,
|
user_provided: RoaringBitmap<[]>,
|
||||||
},
|
},
|
||||||
|
@ -0,0 +1,44 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
### Autobatching Enabled = true
|
||||||
|
### Processing Tasks:
|
||||||
|
[]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### All Tasks:
|
||||||
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
|
1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Status:
|
||||||
|
enqueued []
|
||||||
|
succeeded [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Kind:
|
||||||
|
"documentAdditionOrUpdate" [1,]
|
||||||
|
"settingsUpdate" [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Tasks:
|
||||||
|
doggos [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Mapper:
|
||||||
|
doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} }
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Canceled By:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Enqueued At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Started At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Finished At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### File Store:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
@ -0,0 +1,43 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
### Autobatching Enabled = true
|
||||||
|
### Processing Tasks:
|
||||||
|
[]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### All Tasks:
|
||||||
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
|
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Status:
|
||||||
|
enqueued [1,]
|
||||||
|
succeeded [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Kind:
|
||||||
|
"documentAdditionOrUpdate" [1,]
|
||||||
|
"settingsUpdate" [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Tasks:
|
||||||
|
doggos [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Mapper:
|
||||||
|
doggos: { number_of_documents: 0, field_distribution: {} }
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Canceled By:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Enqueued At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Started At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Finished At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### File Store:
|
||||||
|
00000000-0000-0000-0000-000000000000
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
@ -0,0 +1,43 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
### Autobatching Enabled = true
|
||||||
|
### Processing Tasks:
|
||||||
|
[]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### All Tasks:
|
||||||
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
|
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Status:
|
||||||
|
enqueued [1,]
|
||||||
|
succeeded [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Kind:
|
||||||
|
"documentAdditionOrUpdate" [1,]
|
||||||
|
"settingsUpdate" [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Tasks:
|
||||||
|
doggos [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Mapper:
|
||||||
|
doggos: { number_of_documents: 0, field_distribution: {} }
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Canceled By:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Enqueued At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Started At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Finished At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### File Store:
|
||||||
|
00000000-0000-0000-0000-000000000000
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
@ -0,0 +1,56 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
### Autobatching Enabled = true
|
||||||
|
### Processing Tasks:
|
||||||
|
[]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### All Tasks:
|
||||||
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
|
1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||||
|
2 {uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
|
||||||
|
3 {uid: 3, status: failed, error: ResponseError { code: 200, message: "Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
|
||||||
|
4 {uid: 4, status: failed, error: ResponseError { code: 200, message: "Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
|
||||||
|
5 {uid: 5, status: succeeded, details: { original_filter: "catto EXISTS", deleted_documents: Some(1) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }}
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Status:
|
||||||
|
enqueued []
|
||||||
|
succeeded [0,1,2,5,]
|
||||||
|
failed [3,4,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Kind:
|
||||||
|
"documentAdditionOrUpdate" [1,]
|
||||||
|
"documentDeletion" [2,3,4,5,]
|
||||||
|
"settingsUpdate" [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Tasks:
|
||||||
|
doggos [0,1,2,3,4,5,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Mapper:
|
||||||
|
doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} }
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Canceled By:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Enqueued At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
[timestamp] [2,]
|
||||||
|
[timestamp] [3,]
|
||||||
|
[timestamp] [4,]
|
||||||
|
[timestamp] [5,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Started At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
[timestamp] [2,3,4,5,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Finished At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
[timestamp] [2,3,4,5,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### File Store:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
@ -0,0 +1,9 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"doggo": "bork"
|
||||||
|
}
|
||||||
|
]
|
@ -0,0 +1,53 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
### Autobatching Enabled = true
|
||||||
|
### Processing Tasks:
|
||||||
|
[]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### All Tasks:
|
||||||
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
|
1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||||
|
2 {uid: 2, status: enqueued, details: { received_document_ids: 1, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
|
||||||
|
3 {uid: 3, status: enqueued, details: { original_filter: true, deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
|
||||||
|
4 {uid: 4, status: enqueued, details: { original_filter: "id = 2", deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
|
||||||
|
5 {uid: 5, status: enqueued, details: { original_filter: "catto EXISTS", deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }}
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Status:
|
||||||
|
enqueued [2,3,4,5,]
|
||||||
|
succeeded [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Kind:
|
||||||
|
"documentAdditionOrUpdate" [1,]
|
||||||
|
"documentDeletion" [2,3,4,5,]
|
||||||
|
"settingsUpdate" [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Tasks:
|
||||||
|
doggos [0,1,2,3,4,5,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Mapper:
|
||||||
|
doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} }
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Canceled By:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Enqueued At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
[timestamp] [2,]
|
||||||
|
[timestamp] [3,]
|
||||||
|
[timestamp] [4,]
|
||||||
|
[timestamp] [5,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Started At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Finished At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### File Store:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
@ -0,0 +1,39 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
### Autobatching Enabled = true
|
||||||
|
### Processing Tasks:
|
||||||
|
[]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### All Tasks:
|
||||||
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
|
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Status:
|
||||||
|
enqueued [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Kind:
|
||||||
|
"documentAdditionOrUpdate" [1,]
|
||||||
|
"settingsUpdate" [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Tasks:
|
||||||
|
doggos [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Mapper:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Canceled By:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Enqueued At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Started At:
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Finished At:
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### File Store:
|
||||||
|
00000000-0000-0000-0000-000000000000
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued []
|
enqueued []
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued []
|
enqueued []
|
||||||
|
@ -66,3 +66,5 @@ khmer = ["milli/khmer"]
|
|||||||
vietnamese = ["milli/vietnamese"]
|
vietnamese = ["milli/vietnamese"]
|
||||||
# force swedish character recomposition
|
# force swedish character recomposition
|
||||||
swedish-recomposition = ["milli/swedish-recomposition"]
|
swedish-recomposition = ["milli/swedish-recomposition"]
|
||||||
|
# force german character recomposition
|
||||||
|
german = ["milli/german"]
|
||||||
|
@ -238,8 +238,14 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
|
|||||||
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ;
|
InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ;
|
InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchMaxValuesPerFacet , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchMergeFacets , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ;
|
InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ;
|
InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
|
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
|
||||||
@ -388,7 +394,11 @@ impl ErrorCode for milli::Error {
|
|||||||
| UserError::InvalidOpenAiModelDimensionsMax { .. }
|
| UserError::InvalidOpenAiModelDimensionsMax { .. }
|
||||||
| UserError::InvalidSettingsDimensions { .. }
|
| UserError::InvalidSettingsDimensions { .. }
|
||||||
| UserError::InvalidUrl { .. }
|
| UserError::InvalidUrl { .. }
|
||||||
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
|
| UserError::InvalidSettingsDocumentTemplateMaxBytes { .. }
|
||||||
|
| UserError::InvalidPrompt(_)
|
||||||
|
| UserError::InvalidDisableBinaryQuantization { .. } => {
|
||||||
|
Code::InvalidSettingsEmbedders
|
||||||
|
}
|
||||||
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
||||||
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
||||||
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use std::borrow::Borrow;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
@ -8,7 +9,7 @@ use crate::error::{Code, ErrorCode};
|
|||||||
|
|
||||||
/// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400
|
/// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400
|
||||||
/// bytes long
|
/// bytes long
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
|
#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord)]
|
||||||
#[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)]
|
#[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)]
|
||||||
pub struct IndexUid(String);
|
pub struct IndexUid(String);
|
||||||
|
|
||||||
@ -70,6 +71,12 @@ impl From<IndexUid> for String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Borrow<String> for IndexUid {
|
||||||
|
fn borrow(&self) -> &String {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct IndexUidFormatError {
|
pub struct IndexUidFormatError {
|
||||||
pub invalid_uid: String,
|
pub invalid_uid: String,
|
||||||
|
@ -1,135 +1,6 @@
|
|||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
use milli::LocalizedAttributesRule;
|
use milli::LocalizedAttributesRule;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::json;
|
|
||||||
|
|
||||||
/// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language.
|
|
||||||
///
|
|
||||||
/// this enum implements `Deserr` in order to be used in the API.
|
|
||||||
macro_rules! make_locale {
|
|
||||||
|
|
||||||
($($language:tt), +) => {
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)]
|
|
||||||
#[deserr(rename_all = camelCase)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub enum Locale {
|
|
||||||
$($language),+,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<milli::tokenizer::Language> for Locale {
|
|
||||||
fn from(other: milli::tokenizer::Language) -> Locale {
|
|
||||||
match other {
|
|
||||||
$(milli::tokenizer::Language::$language => Locale::$language), +
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<Locale> for milli::tokenizer::Language {
|
|
||||||
fn from(other: Locale) -> milli::tokenizer::Language {
|
|
||||||
match other {
|
|
||||||
$(Locale::$language => milli::tokenizer::Language::$language), +,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct LocaleFormatError {
|
|
||||||
pub invalid_locale: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for LocaleFormatError {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
let valid_locales = [$(Locale::$language),+].iter().map(|l| format!("`{}`", json!(l).as_str().unwrap())).collect::<Vec<_>>().join(", ");
|
|
||||||
write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
make_locale! {
|
|
||||||
Epo,
|
|
||||||
Eng,
|
|
||||||
Rus,
|
|
||||||
Cmn,
|
|
||||||
Spa,
|
|
||||||
Por,
|
|
||||||
Ita,
|
|
||||||
Ben,
|
|
||||||
Fra,
|
|
||||||
Deu,
|
|
||||||
Ukr,
|
|
||||||
Kat,
|
|
||||||
Ara,
|
|
||||||
Hin,
|
|
||||||
Jpn,
|
|
||||||
Heb,
|
|
||||||
Yid,
|
|
||||||
Pol,
|
|
||||||
Amh,
|
|
||||||
Jav,
|
|
||||||
Kor,
|
|
||||||
Nob,
|
|
||||||
Dan,
|
|
||||||
Swe,
|
|
||||||
Fin,
|
|
||||||
Tur,
|
|
||||||
Nld,
|
|
||||||
Hun,
|
|
||||||
Ces,
|
|
||||||
Ell,
|
|
||||||
Bul,
|
|
||||||
Bel,
|
|
||||||
Mar,
|
|
||||||
Kan,
|
|
||||||
Ron,
|
|
||||||
Slv,
|
|
||||||
Hrv,
|
|
||||||
Srp,
|
|
||||||
Mkd,
|
|
||||||
Lit,
|
|
||||||
Lav,
|
|
||||||
Est,
|
|
||||||
Tam,
|
|
||||||
Vie,
|
|
||||||
Urd,
|
|
||||||
Tha,
|
|
||||||
Guj,
|
|
||||||
Uzb,
|
|
||||||
Pan,
|
|
||||||
Aze,
|
|
||||||
Ind,
|
|
||||||
Tel,
|
|
||||||
Pes,
|
|
||||||
Mal,
|
|
||||||
Ori,
|
|
||||||
Mya,
|
|
||||||
Nep,
|
|
||||||
Sin,
|
|
||||||
Khm,
|
|
||||||
Tuk,
|
|
||||||
Aka,
|
|
||||||
Zul,
|
|
||||||
Sna,
|
|
||||||
Afr,
|
|
||||||
Lat,
|
|
||||||
Slk,
|
|
||||||
Cat,
|
|
||||||
Tgl,
|
|
||||||
Hye,
|
|
||||||
Zho
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::error::Error for LocaleFormatError {}
|
|
||||||
|
|
||||||
impl std::str::FromStr for Locale {
|
|
||||||
type Err = LocaleFormatError;
|
|
||||||
|
|
||||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
||||||
milli::tokenizer::Language::from_code(s)
|
|
||||||
.map(Self::from)
|
|
||||||
.ok_or(LocaleFormatError { invalid_locale: s.to_string() })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize)]
|
#[derive(Debug, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize)]
|
||||||
#[deserr(rename_all = camelCase)]
|
#[deserr(rename_all = camelCase)]
|
||||||
@ -156,3 +27,140 @@ impl From<LocalizedAttributesRuleView> for LocalizedAttributesRule {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language.
|
||||||
|
///
|
||||||
|
/// this enum implements `Deserr` in order to be used in the API.
|
||||||
|
macro_rules! make_locale {
|
||||||
|
($(($iso_639_1:ident, $iso_639_1_str:expr) => ($iso_639_3:ident, $iso_639_3_str:expr),)+) => {
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)]
|
||||||
|
#[deserr(rename_all = camelCase)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub enum Locale {
|
||||||
|
$($iso_639_1,)+
|
||||||
|
$($iso_639_3,)+
|
||||||
|
Cmn,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<milli::tokenizer::Language> for Locale {
|
||||||
|
fn from(other: milli::tokenizer::Language) -> Locale {
|
||||||
|
match other {
|
||||||
|
$(milli::tokenizer::Language::$iso_639_3 => Locale::$iso_639_3,)+
|
||||||
|
milli::tokenizer::Language::Cmn => Locale::Cmn,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Locale> for milli::tokenizer::Language {
|
||||||
|
fn from(other: Locale) -> milli::tokenizer::Language {
|
||||||
|
match other {
|
||||||
|
$(Locale::$iso_639_1 => milli::tokenizer::Language::$iso_639_3,)+
|
||||||
|
$(Locale::$iso_639_3 => milli::tokenizer::Language::$iso_639_3,)+
|
||||||
|
Locale::Cmn => milli::tokenizer::Language::Cmn,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::str::FromStr for Locale {
|
||||||
|
type Err = LocaleFormatError;
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
let locale = match s {
|
||||||
|
$($iso_639_1_str => Locale::$iso_639_1,)+
|
||||||
|
$($iso_639_3_str => Locale::$iso_639_3,)+
|
||||||
|
"cmn" => Locale::Cmn,
|
||||||
|
_ => return Err(LocaleFormatError { invalid_locale: s.to_string() }),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(locale)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct LocaleFormatError {
|
||||||
|
pub invalid_locale: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for LocaleFormatError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let mut valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+,"cmn"];
|
||||||
|
valid_locales.sort_by(|left, right| left.len().cmp(&right.len()).then(left.cmp(right)));
|
||||||
|
write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales.join(", "))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for LocaleFormatError {}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
make_locale!(
|
||||||
|
(Af, "af") => (Afr, "afr"),
|
||||||
|
(Ak, "ak") => (Aka, "aka"),
|
||||||
|
(Am, "am") => (Amh, "amh"),
|
||||||
|
(Ar, "ar") => (Ara, "ara"),
|
||||||
|
(Az, "az") => (Aze, "aze"),
|
||||||
|
(Be, "be") => (Bel, "bel"),
|
||||||
|
(Bn, "bn") => (Ben, "ben"),
|
||||||
|
(Bg, "bg") => (Bul, "bul"),
|
||||||
|
(Ca, "ca") => (Cat, "cat"),
|
||||||
|
(Cs, "cs") => (Ces, "ces"),
|
||||||
|
(Da, "da") => (Dan, "dan"),
|
||||||
|
(De, "de") => (Deu, "deu"),
|
||||||
|
(El, "el") => (Ell, "ell"),
|
||||||
|
(En, "en") => (Eng, "eng"),
|
||||||
|
(Eo, "eo") => (Epo, "epo"),
|
||||||
|
(Et, "et") => (Est, "est"),
|
||||||
|
(Fi, "fi") => (Fin, "fin"),
|
||||||
|
(Fr, "fr") => (Fra, "fra"),
|
||||||
|
(Gu, "gu") => (Guj, "guj"),
|
||||||
|
(He, "he") => (Heb, "heb"),
|
||||||
|
(Hi, "hi") => (Hin, "hin"),
|
||||||
|
(Hr, "hr") => (Hrv, "hrv"),
|
||||||
|
(Hu, "hu") => (Hun, "hun"),
|
||||||
|
(Hy, "hy") => (Hye, "hye"),
|
||||||
|
(Id, "id") => (Ind, "ind"),
|
||||||
|
(It, "it") => (Ita, "ita"),
|
||||||
|
(Jv, "jv") => (Jav, "jav"),
|
||||||
|
(Ja, "ja") => (Jpn, "jpn"),
|
||||||
|
(Kn, "kn") => (Kan, "kan"),
|
||||||
|
(Ka, "ka") => (Kat, "kat"),
|
||||||
|
(Km, "km") => (Khm, "khm"),
|
||||||
|
(Ko, "ko") => (Kor, "kor"),
|
||||||
|
(La, "la") => (Lat, "lat"),
|
||||||
|
(Lv, "lv") => (Lav, "lav"),
|
||||||
|
(Lt, "lt") => (Lit, "lit"),
|
||||||
|
(Ml, "ml") => (Mal, "mal"),
|
||||||
|
(Mr, "mr") => (Mar, "mar"),
|
||||||
|
(Mk, "mk") => (Mkd, "mkd"),
|
||||||
|
(My, "my") => (Mya, "mya"),
|
||||||
|
(Ne, "ne") => (Nep, "nep"),
|
||||||
|
(Nl, "nl") => (Nld, "nld"),
|
||||||
|
(Nb, "nb") => (Nob, "nob"),
|
||||||
|
(Or, "or") => (Ori, "ori"),
|
||||||
|
(Pa, "pa") => (Pan, "pan"),
|
||||||
|
(Fa, "fa") => (Pes, "pes"),
|
||||||
|
(Pl, "pl") => (Pol, "pol"),
|
||||||
|
(Pt, "pt") => (Por, "por"),
|
||||||
|
(Ro, "ro") => (Ron, "ron"),
|
||||||
|
(Ru, "ru") => (Rus, "rus"),
|
||||||
|
(Si, "si") => (Sin, "sin"),
|
||||||
|
(Sk, "sk") => (Slk, "slk"),
|
||||||
|
(Sl, "sl") => (Slv, "slv"),
|
||||||
|
(Sn, "sn") => (Sna, "sna"),
|
||||||
|
(Es, "es") => (Spa, "spa"),
|
||||||
|
(Sr, "sr") => (Srp, "srp"),
|
||||||
|
(Sv, "sv") => (Swe, "swe"),
|
||||||
|
(Ta, "ta") => (Tam, "tam"),
|
||||||
|
(Te, "te") => (Tel, "tel"),
|
||||||
|
(Tl, "tl") => (Tgl, "tgl"),
|
||||||
|
(Th, "th") => (Tha, "tha"),
|
||||||
|
(Tk, "tk") => (Tuk, "tuk"),
|
||||||
|
(Tr, "tr") => (Tur, "tur"),
|
||||||
|
(Uk, "uk") => (Ukr, "ukr"),
|
||||||
|
(Ur, "ur") => (Urd, "urd"),
|
||||||
|
(Uz, "uz") => (Uzb, "uzb"),
|
||||||
|
(Vi, "vi") => (Vie, "vie"),
|
||||||
|
(Yi, "yi") => (Yid, "yid"),
|
||||||
|
(Zh, "zh") => (Zho, "zho"),
|
||||||
|
(Zu, "zu") => (Zul, "zul"),
|
||||||
|
);
|
||||||
|
@ -10,38 +10,52 @@ static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR");
|
|||||||
static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");
|
static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");
|
||||||
|
|
||||||
/// Persists the version of the current Meilisearch binary to a VERSION file
|
/// Persists the version of the current Meilisearch binary to a VERSION file
|
||||||
pub fn create_version_file(db_path: &Path) -> io::Result<()> {
|
pub fn create_current_version_file(db_path: &Path) -> io::Result<()> {
|
||||||
|
create_version_file(db_path, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn create_version_file(
|
||||||
|
db_path: &Path,
|
||||||
|
major: &str,
|
||||||
|
minor: &str,
|
||||||
|
patch: &str,
|
||||||
|
) -> io::Result<()> {
|
||||||
let version_path = db_path.join(VERSION_FILE_NAME);
|
let version_path = db_path.join(VERSION_FILE_NAME);
|
||||||
fs::write(version_path, format!("{}.{}.{}", VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH))
|
fs::write(version_path, format!("{}.{}.{}", major, minor, patch))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Ensures Meilisearch version is compatible with the database, returns an error versions mismatch.
|
/// Ensures Meilisearch version is compatible with the database, returns an error versions mismatch.
|
||||||
pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> {
|
pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> {
|
||||||
let version_path = db_path.join(VERSION_FILE_NAME);
|
let (major, minor, patch) = get_version(db_path)?;
|
||||||
|
|
||||||
match fs::read_to_string(version_path) {
|
if major != VERSION_MAJOR || minor != VERSION_MINOR {
|
||||||
Ok(version) => {
|
return Err(VersionFileError::VersionMismatch { major, minor, patch }.into());
|
||||||
let version_components = version.split('.').collect::<Vec<_>>();
|
|
||||||
let (major, minor, patch) = match &version_components[..] {
|
|
||||||
[major, minor, patch] => (major.to_string(), minor.to_string(), patch.to_string()),
|
|
||||||
_ => return Err(VersionFileError::MalformedVersionFile.into()),
|
|
||||||
};
|
|
||||||
|
|
||||||
if major != VERSION_MAJOR || minor != VERSION_MINOR {
|
|
||||||
return Err(VersionFileError::VersionMismatch { major, minor, patch }.into());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(error) => {
|
|
||||||
return match error.kind() {
|
|
||||||
ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile.into()),
|
|
||||||
_ => Err(error.into()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_version(db_path: &Path) -> Result<(String, String, String), VersionFileError> {
|
||||||
|
let version_path = db_path.join(VERSION_FILE_NAME);
|
||||||
|
|
||||||
|
match fs::read_to_string(version_path) {
|
||||||
|
Ok(version) => parse_version(&version),
|
||||||
|
Err(error) => match error.kind() {
|
||||||
|
ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile),
|
||||||
|
_ => Err(error.into()),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_version(version: &str) -> Result<(String, String, String), VersionFileError> {
|
||||||
|
let version_components = version.split('.').collect::<Vec<_>>();
|
||||||
|
let (major, minor, patch) = match &version_components[..] {
|
||||||
|
[major, minor, patch] => (major.to_string(), minor.to_string(), patch.to_string()),
|
||||||
|
_ => return Err(VersionFileError::MalformedVersionFile),
|
||||||
|
};
|
||||||
|
Ok((major, minor, patch))
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(thiserror::Error, Debug)]
|
#[derive(thiserror::Error, Debug)]
|
||||||
pub enum VersionFileError {
|
pub enum VersionFileError {
|
||||||
#[error(
|
#[error(
|
||||||
@ -58,4 +72,7 @@ pub enum VersionFileError {
|
|||||||
env!("CARGO_PKG_VERSION").to_string()
|
env!("CARGO_PKG_VERSION").to_string()
|
||||||
)]
|
)]
|
||||||
VersionMismatch { major: String, minor: String, patch: String },
|
VersionMismatch { major: String, minor: String, patch: String },
|
||||||
|
|
||||||
|
#[error(transparent)]
|
||||||
|
IoError(#[from] std::io::Error),
|
||||||
}
|
}
|
||||||
|
@ -153,6 +153,7 @@ greek = ["meilisearch-types/greek"]
|
|||||||
khmer = ["meilisearch-types/khmer"]
|
khmer = ["meilisearch-types/khmer"]
|
||||||
vietnamese = ["meilisearch-types/vietnamese"]
|
vietnamese = ["meilisearch-types/vietnamese"]
|
||||||
swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
|
swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
|
||||||
|
german = ["meilisearch-types/german"]
|
||||||
|
|
||||||
[package.metadata.mini-dashboard]
|
[package.metadata.mini-dashboard]
|
||||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"
|
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"
|
||||||
|
@ -646,8 +646,6 @@ pub struct SearchAggregator {
|
|||||||
max_vector_size: usize,
|
max_vector_size: usize,
|
||||||
// Whether the semantic ratio passed to a hybrid search equals the default ratio.
|
// Whether the semantic ratio passed to a hybrid search equals the default ratio.
|
||||||
semantic_ratio: bool,
|
semantic_ratio: bool,
|
||||||
// Whether a non-default embedder was specified
|
|
||||||
embedder: bool,
|
|
||||||
hybrid: bool,
|
hybrid: bool,
|
||||||
retrieve_vectors: bool,
|
retrieve_vectors: bool,
|
||||||
|
|
||||||
@ -795,7 +793,6 @@ impl SearchAggregator {
|
|||||||
|
|
||||||
if let Some(hybrid) = hybrid {
|
if let Some(hybrid) = hybrid {
|
||||||
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
|
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
|
||||||
ret.embedder = hybrid.embedder.is_some();
|
|
||||||
ret.hybrid = true;
|
ret.hybrid = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -863,7 +860,6 @@ impl SearchAggregator {
|
|||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
semantic_ratio,
|
semantic_ratio,
|
||||||
embedder,
|
|
||||||
hybrid,
|
hybrid,
|
||||||
total_degraded,
|
total_degraded,
|
||||||
total_used_negative_operator,
|
total_used_negative_operator,
|
||||||
@ -923,7 +919,6 @@ impl SearchAggregator {
|
|||||||
self.retrieve_vectors |= retrieve_vectors;
|
self.retrieve_vectors |= retrieve_vectors;
|
||||||
self.semantic_ratio |= semantic_ratio;
|
self.semantic_ratio |= semantic_ratio;
|
||||||
self.hybrid |= hybrid;
|
self.hybrid |= hybrid;
|
||||||
self.embedder |= embedder;
|
|
||||||
|
|
||||||
// pagination
|
// pagination
|
||||||
self.max_limit = self.max_limit.max(max_limit);
|
self.max_limit = self.max_limit.max(max_limit);
|
||||||
@ -999,7 +994,6 @@ impl SearchAggregator {
|
|||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
semantic_ratio,
|
semantic_ratio,
|
||||||
embedder,
|
|
||||||
hybrid,
|
hybrid,
|
||||||
total_degraded,
|
total_degraded,
|
||||||
total_used_negative_operator,
|
total_used_negative_operator,
|
||||||
@ -1051,7 +1045,6 @@ impl SearchAggregator {
|
|||||||
"hybrid": {
|
"hybrid": {
|
||||||
"enabled": hybrid,
|
"enabled": hybrid,
|
||||||
"semantic_ratio": semantic_ratio,
|
"semantic_ratio": semantic_ratio,
|
||||||
"embedder": embedder,
|
|
||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"max_limit": max_limit,
|
"max_limit": max_limit,
|
||||||
@ -1782,7 +1775,6 @@ pub struct SimilarAggregator {
|
|||||||
used_syntax: HashMap<String, usize>,
|
used_syntax: HashMap<String, usize>,
|
||||||
|
|
||||||
// Whether a non-default embedder was specified
|
// Whether a non-default embedder was specified
|
||||||
embedder: bool,
|
|
||||||
retrieve_vectors: bool,
|
retrieve_vectors: bool,
|
||||||
|
|
||||||
// pagination
|
// pagination
|
||||||
@ -1803,7 +1795,7 @@ impl SimilarAggregator {
|
|||||||
pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self {
|
pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self {
|
||||||
let SimilarQuery {
|
let SimilarQuery {
|
||||||
id: _,
|
id: _,
|
||||||
embedder,
|
embedder: _,
|
||||||
offset,
|
offset,
|
||||||
limit,
|
limit,
|
||||||
attributes_to_retrieve: _,
|
attributes_to_retrieve: _,
|
||||||
@ -1851,7 +1843,6 @@ impl SimilarAggregator {
|
|||||||
ret.show_ranking_score_details = *show_ranking_score_details;
|
ret.show_ranking_score_details = *show_ranking_score_details;
|
||||||
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
||||||
|
|
||||||
ret.embedder = embedder.is_some();
|
|
||||||
ret.retrieve_vectors = *retrieve_vectors;
|
ret.retrieve_vectors = *retrieve_vectors;
|
||||||
|
|
||||||
ret
|
ret
|
||||||
@ -1883,7 +1874,6 @@ impl SimilarAggregator {
|
|||||||
max_attributes_to_retrieve,
|
max_attributes_to_retrieve,
|
||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
embedder,
|
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
retrieve_vectors,
|
retrieve_vectors,
|
||||||
} = other;
|
} = other;
|
||||||
@ -1914,7 +1904,6 @@ impl SimilarAggregator {
|
|||||||
*used_syntax = used_syntax.saturating_add(value);
|
*used_syntax = used_syntax.saturating_add(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
self.embedder |= embedder;
|
|
||||||
self.retrieve_vectors |= retrieve_vectors;
|
self.retrieve_vectors |= retrieve_vectors;
|
||||||
|
|
||||||
// pagination
|
// pagination
|
||||||
@ -1948,7 +1937,6 @@ impl SimilarAggregator {
|
|||||||
max_attributes_to_retrieve,
|
max_attributes_to_retrieve,
|
||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
embedder,
|
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
retrieve_vectors,
|
retrieve_vectors,
|
||||||
} = self;
|
} = self;
|
||||||
@ -1980,9 +1968,6 @@ impl SimilarAggregator {
|
|||||||
"vector": {
|
"vector": {
|
||||||
"retrieve_vectors": retrieve_vectors,
|
"retrieve_vectors": retrieve_vectors,
|
||||||
},
|
},
|
||||||
"hybrid": {
|
|
||||||
"embedder": embedder,
|
|
||||||
},
|
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"max_limit": max_limit,
|
"max_limit": max_limit,
|
||||||
"max_offset": max_offset,
|
"max_offset": max_offset,
|
||||||
|
@ -4,6 +4,7 @@ use byte_unit::{Byte, UnitType};
|
|||||||
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
|
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
|
||||||
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
||||||
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
|
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
|
||||||
|
use meilisearch_types::milli::OrderBy;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use tokio::task::JoinError;
|
use tokio::task::JoinError;
|
||||||
|
|
||||||
@ -27,10 +28,20 @@ pub enum MeilisearchHttpError {
|
|||||||
EmptyFilter,
|
EmptyFilter,
|
||||||
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
||||||
InvalidExpression(&'static [&'static str], Value),
|
InvalidExpression(&'static [&'static str], Value),
|
||||||
#[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")]
|
#[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")]
|
||||||
FederationOptionsInNonFederatedRequest(usize),
|
FederationOptionsInNonFederatedRequest(usize),
|
||||||
#[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")]
|
#[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `{1}` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search")]
|
||||||
PaginationInFederatedQuery(usize, &'static str),
|
PaginationInFederatedQuery(usize, &'static str),
|
||||||
|
#[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")]
|
||||||
|
FacetsInFederatedQuery(usize, String, Vec<String>),
|
||||||
|
#[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")]
|
||||||
|
InconsistentFacetOrder {
|
||||||
|
facet: String,
|
||||||
|
previous_facet_order: OrderBy,
|
||||||
|
previous_uid: String,
|
||||||
|
index_facet_order: OrderBy,
|
||||||
|
current_uid: String,
|
||||||
|
},
|
||||||
#[error("A {0} payload is missing.")]
|
#[error("A {0} payload is missing.")]
|
||||||
MissingPayload(PayloadType),
|
MissingPayload(PayloadType),
|
||||||
#[error("Too many search requests running at the same time: {0}. Retry after 10s.")]
|
#[error("Too many search requests running at the same time: {0}. Retry after 10s.")]
|
||||||
@ -61,7 +72,7 @@ pub enum MeilisearchHttpError {
|
|||||||
DocumentFormat(#[from] DocumentFormatError),
|
DocumentFormat(#[from] DocumentFormatError),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Join(#[from] JoinError),
|
Join(#[from] JoinError),
|
||||||
#[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")]
|
#[error("Invalid request: missing `hybrid` parameter when `vector` is present.")]
|
||||||
MissingSearchHybrid,
|
MissingSearchHybrid,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,6 +107,10 @@ impl ErrorCode for MeilisearchHttpError {
|
|||||||
MeilisearchHttpError::PaginationInFederatedQuery(_, _) => {
|
MeilisearchHttpError::PaginationInFederatedQuery(_, _) => {
|
||||||
Code::InvalidMultiSearchQueryPagination
|
Code::InvalidMultiSearchQueryPagination
|
||||||
}
|
}
|
||||||
|
MeilisearchHttpError::FacetsInFederatedQuery(..) => Code::InvalidMultiSearchQueryFacets,
|
||||||
|
MeilisearchHttpError::InconsistentFacetOrder { .. } => {
|
||||||
|
Code::InvalidMultiSearchFacetOrder
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -13,11 +13,10 @@ pub mod search_queue;
|
|||||||
|
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufReader, BufWriter};
|
use std::io::{BufReader, BufWriter};
|
||||||
use std::num::NonZeroUsize;
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::thread::{self, available_parallelism};
|
use std::thread;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use actix_cors::Cors;
|
use actix_cors::Cors;
|
||||||
@ -37,7 +36,7 @@ use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchR
|
|||||||
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
|
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
|
||||||
use meilisearch_types::settings::apply_settings_to_builder;
|
use meilisearch_types::settings::apply_settings_to_builder;
|
||||||
use meilisearch_types::tasks::KindWithContent;
|
use meilisearch_types::tasks::KindWithContent;
|
||||||
use meilisearch_types::versioning::{check_version_file, create_version_file};
|
use meilisearch_types::versioning::{check_version_file, create_current_version_file};
|
||||||
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
|
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
|
||||||
pub use option::Opt;
|
pub use option::Opt;
|
||||||
use option::ScheduleSnapshot;
|
use option::ScheduleSnapshot;
|
||||||
@ -118,6 +117,7 @@ pub type LogStderrType = tracing_subscriber::filter::Filtered<
|
|||||||
pub fn create_app(
|
pub fn create_app(
|
||||||
index_scheduler: Data<IndexScheduler>,
|
index_scheduler: Data<IndexScheduler>,
|
||||||
auth_controller: Data<AuthController>,
|
auth_controller: Data<AuthController>,
|
||||||
|
search_queue: Data<SearchQueue>,
|
||||||
opt: Opt,
|
opt: Opt,
|
||||||
logs: (LogRouteHandle, LogStderrHandle),
|
logs: (LogRouteHandle, LogStderrHandle),
|
||||||
analytics: Arc<dyn Analytics>,
|
analytics: Arc<dyn Analytics>,
|
||||||
@ -137,6 +137,7 @@ pub fn create_app(
|
|||||||
s,
|
s,
|
||||||
index_scheduler.clone(),
|
index_scheduler.clone(),
|
||||||
auth_controller.clone(),
|
auth_controller.clone(),
|
||||||
|
search_queue.clone(),
|
||||||
&opt,
|
&opt,
|
||||||
logs,
|
logs,
|
||||||
analytics.clone(),
|
analytics.clone(),
|
||||||
@ -318,7 +319,7 @@ fn open_or_create_database_unchecked(
|
|||||||
match (
|
match (
|
||||||
index_scheduler_builder(),
|
index_scheduler_builder(),
|
||||||
auth_controller.map_err(anyhow::Error::from),
|
auth_controller.map_err(anyhow::Error::from),
|
||||||
create_version_file(&opt.db_path).map_err(anyhow::Error::from),
|
create_current_version_file(&opt.db_path).map_err(anyhow::Error::from),
|
||||||
) {
|
) {
|
||||||
(Ok(i), Ok(a), Ok(())) => Ok((i, a)),
|
(Ok(i), Ok(a), Ok(())) => Ok((i, a)),
|
||||||
(Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => {
|
(Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => {
|
||||||
@ -469,19 +470,16 @@ pub fn configure_data(
|
|||||||
config: &mut web::ServiceConfig,
|
config: &mut web::ServiceConfig,
|
||||||
index_scheduler: Data<IndexScheduler>,
|
index_scheduler: Data<IndexScheduler>,
|
||||||
auth: Data<AuthController>,
|
auth: Data<AuthController>,
|
||||||
|
search_queue: Data<SearchQueue>,
|
||||||
opt: &Opt,
|
opt: &Opt,
|
||||||
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
|
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
|
||||||
analytics: Arc<dyn Analytics>,
|
analytics: Arc<dyn Analytics>,
|
||||||
) {
|
) {
|
||||||
let search_queue = SearchQueue::new(
|
|
||||||
opt.experimental_search_queue_size,
|
|
||||||
available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
|
|
||||||
);
|
|
||||||
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
|
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
|
||||||
config
|
config
|
||||||
.app_data(index_scheduler)
|
.app_data(index_scheduler)
|
||||||
.app_data(auth)
|
.app_data(auth)
|
||||||
.app_data(web::Data::new(search_queue))
|
.app_data(search_queue)
|
||||||
.app_data(web::Data::from(analytics))
|
.app_data(web::Data::from(analytics))
|
||||||
.app_data(web::Data::new(logs_route))
|
.app_data(web::Data::new(logs_route))
|
||||||
.app_data(web::Data::new(logs_stderr))
|
.app_data(web::Data::new(logs_stderr))
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
use std::env;
|
use std::env;
|
||||||
use std::io::{stderr, LineWriter, Write};
|
use std::io::{stderr, LineWriter, Write};
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::thread::available_parallelism;
|
||||||
|
|
||||||
use actix_web::http::KeepAlive;
|
use actix_web::http::KeepAlive;
|
||||||
use actix_web::web::Data;
|
use actix_web::web::Data;
|
||||||
@ -11,6 +13,7 @@ use index_scheduler::IndexScheduler;
|
|||||||
use is_terminal::IsTerminal;
|
use is_terminal::IsTerminal;
|
||||||
use meilisearch::analytics::Analytics;
|
use meilisearch::analytics::Analytics;
|
||||||
use meilisearch::option::LogMode;
|
use meilisearch::option::LogMode;
|
||||||
|
use meilisearch::search_queue::SearchQueue;
|
||||||
use meilisearch::{
|
use meilisearch::{
|
||||||
analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle,
|
analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle,
|
||||||
LogStderrType, Opt, SubscriberForSecondLayer,
|
LogStderrType, Opt, SubscriberForSecondLayer,
|
||||||
@ -148,11 +151,17 @@ async fn run_http(
|
|||||||
let opt_clone = opt.clone();
|
let opt_clone = opt.clone();
|
||||||
let index_scheduler = Data::from(index_scheduler);
|
let index_scheduler = Data::from(index_scheduler);
|
||||||
let auth_controller = Data::from(auth_controller);
|
let auth_controller = Data::from(auth_controller);
|
||||||
|
let search_queue = SearchQueue::new(
|
||||||
|
opt.experimental_search_queue_size,
|
||||||
|
available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
|
||||||
|
);
|
||||||
|
let search_queue = Data::new(search_queue);
|
||||||
|
|
||||||
let http_server = HttpServer::new(move || {
|
let http_server = HttpServer::new(move || {
|
||||||
create_app(
|
create_app(
|
||||||
index_scheduler.clone(),
|
index_scheduler.clone(),
|
||||||
auth_controller.clone(),
|
auth_controller.clone(),
|
||||||
|
search_queue.clone(),
|
||||||
opt.clone(),
|
opt.clone(),
|
||||||
logs.clone(),
|
logs.clone(),
|
||||||
analytics.clone(),
|
analytics.clone(),
|
||||||
|
@ -81,7 +81,7 @@ pub async fn search(
|
|||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features();
|
||||||
let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?;
|
let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?;
|
||||||
let _permit = search_queue.try_get_search_permit().await?;
|
let permit = search_queue.try_get_search_permit().await?;
|
||||||
let search_result = tokio::task::spawn_blocking(move || {
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
perform_facet_search(
|
perform_facet_search(
|
||||||
&index,
|
&index,
|
||||||
@ -93,7 +93,9 @@ pub async fn search(
|
|||||||
locales,
|
locales,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.await?;
|
.await;
|
||||||
|
permit.drop().await;
|
||||||
|
let search_result = search_result?;
|
||||||
|
|
||||||
if let Ok(ref search_result) = search_result {
|
if let Ok(ref search_result) = search_result {
|
||||||
aggregate.succeed(search_result);
|
aggregate.succeed(search_result);
|
||||||
|
@ -128,8 +128,10 @@ impl std::ops::Deref for SemanticRatioGet {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<SearchQueryGet> for SearchQuery {
|
impl TryFrom<SearchQueryGet> for SearchQuery {
|
||||||
fn from(other: SearchQueryGet) -> Self {
|
type Error = ResponseError;
|
||||||
|
|
||||||
|
fn try_from(other: SearchQueryGet) -> Result<Self, Self::Error> {
|
||||||
let filter = match other.filter {
|
let filter = match other.filter {
|
||||||
Some(f) => match serde_json::from_str(&f) {
|
Some(f) => match serde_json::from_str(&f) {
|
||||||
Ok(v) => Some(v),
|
Ok(v) => Some(v),
|
||||||
@ -140,19 +142,28 @@ impl From<SearchQueryGet> for SearchQuery {
|
|||||||
|
|
||||||
let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
|
let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
|
||||||
(None, None) => None,
|
(None, None) => None,
|
||||||
(None, Some(semantic_ratio)) => {
|
(None, Some(_)) => {
|
||||||
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None })
|
return Err(ResponseError::from_msg(
|
||||||
|
"`hybridEmbedder` is mandatory when `hybridSemanticRatio` is present".into(),
|
||||||
|
meilisearch_types::error::Code::InvalidHybridQuery,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
(Some(embedder), None) => {
|
||||||
|
Some(HybridQuery { semantic_ratio: DEFAULT_SEMANTIC_RATIO(), embedder })
|
||||||
}
|
}
|
||||||
(Some(embedder), None) => Some(HybridQuery {
|
|
||||||
semantic_ratio: DEFAULT_SEMANTIC_RATIO(),
|
|
||||||
embedder: Some(embedder),
|
|
||||||
}),
|
|
||||||
(Some(embedder), Some(semantic_ratio)) => {
|
(Some(embedder), Some(semantic_ratio)) => {
|
||||||
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) })
|
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder })
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
Self {
|
if other.vector.is_some() && hybrid.is_none() {
|
||||||
|
return Err(ResponseError::from_msg(
|
||||||
|
"`hybridEmbedder` is mandatory when `vector` is present".into(),
|
||||||
|
meilisearch_types::error::Code::MissingSearchHybrid,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
q: other.q,
|
q: other.q,
|
||||||
vector: other.vector.map(CS::into_inner),
|
vector: other.vector.map(CS::into_inner),
|
||||||
offset: other.offset.0,
|
offset: other.offset.0,
|
||||||
@ -179,7 +190,7 @@ impl From<SearchQueryGet> for SearchQuery {
|
|||||||
hybrid,
|
hybrid,
|
||||||
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
||||||
locales: other.locales.map(|o| o.into_iter().collect()),
|
locales: other.locales.map(|o| o.into_iter().collect()),
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -219,7 +230,7 @@ pub async fn search_with_url_query(
|
|||||||
debug!(parameters = ?params, "Search get");
|
debug!(parameters = ?params, "Search get");
|
||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
|
||||||
let mut query: SearchQuery = params.into_inner().into();
|
let mut query: SearchQuery = params.into_inner().try_into()?;
|
||||||
|
|
||||||
// Tenant token search_rules.
|
// Tenant token search_rules.
|
||||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||||
@ -233,11 +244,13 @@ pub async fn search_with_url_query(
|
|||||||
|
|
||||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
||||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||||
let _permit = search_queue.try_get_search_permit().await?;
|
let permit = search_queue.try_get_search_permit().await?;
|
||||||
let search_result = tokio::task::spawn_blocking(move || {
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features())
|
perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features())
|
||||||
})
|
})
|
||||||
.await?;
|
.await;
|
||||||
|
permit.drop().await;
|
||||||
|
let search_result = search_result?;
|
||||||
if let Ok(ref search_result) = search_result {
|
if let Ok(ref search_result) = search_result {
|
||||||
aggregate.succeed(search_result);
|
aggregate.succeed(search_result);
|
||||||
}
|
}
|
||||||
@ -276,11 +289,13 @@ pub async fn search_with_post(
|
|||||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
||||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||||
|
|
||||||
let _permit = search_queue.try_get_search_permit().await?;
|
let permit = search_queue.try_get_search_permit().await?;
|
||||||
let search_result = tokio::task::spawn_blocking(move || {
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features())
|
perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features())
|
||||||
})
|
})
|
||||||
.await?;
|
.await;
|
||||||
|
permit.drop().await;
|
||||||
|
let search_result = search_result?;
|
||||||
if let Ok(ref search_result) = search_result {
|
if let Ok(ref search_result) = search_result {
|
||||||
aggregate.succeed(search_result);
|
aggregate.succeed(search_result);
|
||||||
if search_result.degraded {
|
if search_result.degraded {
|
||||||
@ -308,44 +323,36 @@ pub fn search_kind(
|
|||||||
features.check_vector("Passing `hybrid` as a parameter")?;
|
features.check_vector("Passing `hybrid` as a parameter")?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing
|
// handle with care, the order of cases matters, the semantics is subtle
|
||||||
if query.vector.is_none() {
|
match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) {
|
||||||
match &query.q {
|
// empty query, no vector => placeholder search
|
||||||
Some(q) if q.trim().is_empty() => return Ok(SearchKind::KeywordOnly),
|
(Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly),
|
||||||
None => return Ok(SearchKind::KeywordOnly),
|
// no query, no vector => placeholder search
|
||||||
_ => {}
|
(None, _, None) => Ok(SearchKind::KeywordOnly),
|
||||||
|
// hybrid.semantic_ratio == 1.0 => vector
|
||||||
|
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
|
||||||
|
SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len()))
|
||||||
}
|
}
|
||||||
}
|
// hybrid.semantic_ratio == 0.0 => keyword
|
||||||
|
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
|
||||||
match &query.hybrid {
|
|
||||||
Some(HybridQuery { semantic_ratio, embedder }) if **semantic_ratio == 1.0 => {
|
|
||||||
Ok(SearchKind::semantic(
|
|
||||||
index_scheduler,
|
|
||||||
index,
|
|
||||||
embedder.as_deref(),
|
|
||||||
query.vector.as_ref().map(Vec::len),
|
|
||||||
)?)
|
|
||||||
}
|
|
||||||
Some(HybridQuery { semantic_ratio, embedder: _ }) if **semantic_ratio == 0.0 => {
|
|
||||||
Ok(SearchKind::KeywordOnly)
|
Ok(SearchKind::KeywordOnly)
|
||||||
}
|
}
|
||||||
Some(HybridQuery { semantic_ratio, embedder }) => Ok(SearchKind::hybrid(
|
// no query, hybrid, vector => semantic
|
||||||
|
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
|
||||||
|
SearchKind::semantic(index_scheduler, index, embedder, Some(v.len()))
|
||||||
|
}
|
||||||
|
// query, no hybrid, no vector => keyword
|
||||||
|
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
|
||||||
|
// query, hybrid, maybe vector => hybrid
|
||||||
|
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
|
||||||
index_scheduler,
|
index_scheduler,
|
||||||
index,
|
index,
|
||||||
embedder.as_deref(),
|
embedder,
|
||||||
**semantic_ratio,
|
**semantic_ratio,
|
||||||
query.vector.as_ref().map(Vec::len),
|
v.map(|v| v.len()),
|
||||||
)?),
|
),
|
||||||
None => match (query.q.as_deref(), query.vector.as_deref()) {
|
|
||||||
(_query, None) => Ok(SearchKind::KeywordOnly),
|
(_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||||
(None, Some(_vector)) => Ok(SearchKind::semantic(
|
|
||||||
index_scheduler,
|
|
||||||
index,
|
|
||||||
None,
|
|
||||||
query.vector.as_ref().map(Vec::len),
|
|
||||||
)?),
|
|
||||||
(Some(_), Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -636,11 +636,26 @@ fn embedder_analytics(
|
|||||||
.any(|config| config.document_template.set().is_some())
|
.any(|config| config.document_template.set().is_some())
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let document_template_max_bytes = setting.as_ref().and_then(|map| {
|
||||||
|
map.values()
|
||||||
|
.filter_map(|config| config.clone().set())
|
||||||
|
.filter_map(|config| config.document_template_max_bytes.set())
|
||||||
|
.max()
|
||||||
|
});
|
||||||
|
|
||||||
|
let binary_quantization_used = setting.as_ref().map(|map| {
|
||||||
|
map.values()
|
||||||
|
.filter_map(|config| config.clone().set())
|
||||||
|
.any(|config| config.binary_quantized.set().is_some())
|
||||||
|
});
|
||||||
|
|
||||||
json!(
|
json!(
|
||||||
{
|
{
|
||||||
"total": setting.as_ref().map(|s| s.len()),
|
"total": setting.as_ref().map(|s| s.len()),
|
||||||
"sources": sources,
|
"sources": sources,
|
||||||
"document_template_used": document_template_used,
|
"document_template_used": document_template_used,
|
||||||
|
"document_template_max_bytes": document_template_max_bytes,
|
||||||
|
"binary_quantization_used": binary_quantization_used,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -102,8 +102,8 @@ async fn similar(
|
|||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
|
|
||||||
let (embedder_name, embedder) =
|
let (embedder_name, embedder, quantized) =
|
||||||
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
|
SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?;
|
||||||
|
|
||||||
tokio::task::spawn_blocking(move || {
|
tokio::task::spawn_blocking(move || {
|
||||||
perform_similar(
|
perform_similar(
|
||||||
@ -111,6 +111,7 @@ async fn similar(
|
|||||||
query,
|
query,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
|
quantized,
|
||||||
retrieve_vectors,
|
retrieve_vectors,
|
||||||
index_scheduler.features(),
|
index_scheduler.features(),
|
||||||
)
|
)
|
||||||
@ -139,8 +140,8 @@ pub struct SimilarQueryGet {
|
|||||||
show_ranking_score_details: Param<bool>,
|
show_ranking_score_details: Param<bool>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
|
||||||
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
|
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
|
#[deserr(error = DeserrQueryParamError<InvalidEmbedder>)]
|
||||||
pub embedder: Option<String>,
|
pub embedder: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||||
|
@ -39,7 +39,7 @@ pub async fn multi_search_with_post(
|
|||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
// Since we don't want to process half of the search requests and then get a permit refused
|
// Since we don't want to process half of the search requests and then get a permit refused
|
||||||
// we're going to get one permit for the whole duration of the multi-search request.
|
// we're going to get one permit for the whole duration of the multi-search request.
|
||||||
let _permit = search_queue.try_get_search_permit().await?;
|
let permit = search_queue.try_get_search_permit().await?;
|
||||||
|
|
||||||
let federated_search = params.into_inner();
|
let federated_search = params.into_inner();
|
||||||
|
|
||||||
@ -81,6 +81,7 @@ pub async fn multi_search_with_post(
|
|||||||
perform_federated_search(&index_scheduler, queries, federation, features)
|
perform_federated_search(&index_scheduler, queries, federation, features)
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
permit.drop().await;
|
||||||
|
|
||||||
if let Ok(Ok(_)) = search_result {
|
if let Ok(Ok(_)) = search_result {
|
||||||
multi_aggregate.succeed();
|
multi_aggregate.succeed();
|
||||||
@ -143,6 +144,7 @@ pub async fn multi_search_with_post(
|
|||||||
Ok(search_results)
|
Ok(search_results)
|
||||||
}
|
}
|
||||||
.await;
|
.await;
|
||||||
|
permit.drop().await;
|
||||||
|
|
||||||
if search_results.is_ok() {
|
if search_results.is_ok() {
|
||||||
multi_aggregate.succeed();
|
multi_aggregate.succeed();
|
||||||
|
@ -9,20 +9,24 @@ use std::vec::{IntoIter, Vec};
|
|||||||
|
|
||||||
use actix_http::StatusCode;
|
use actix_http::StatusCode;
|
||||||
use index_scheduler::{IndexScheduler, RoFeatures};
|
use index_scheduler::{IndexScheduler, RoFeatures};
|
||||||
|
use indexmap::IndexMap;
|
||||||
use meilisearch_types::deserr::DeserrJsonError;
|
use meilisearch_types::deserr::DeserrJsonError;
|
||||||
use meilisearch_types::error::deserr_codes::{
|
use meilisearch_types::error::deserr_codes::{
|
||||||
InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset,
|
InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet,
|
||||||
|
InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit,
|
||||||
|
InvalidSearchOffset,
|
||||||
};
|
};
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue};
|
use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue};
|
||||||
use meilisearch_types::milli::{self, DocumentId, TimeBudget};
|
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
use super::ranking_rules::{self, RankingRules};
|
use super::ranking_rules::{self, RankingRules};
|
||||||
use super::{
|
use super::{
|
||||||
prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind,
|
compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats,
|
||||||
SearchQuery, SearchQueryWithIndex,
|
HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex,
|
||||||
};
|
};
|
||||||
use crate::error::MeilisearchHttpError;
|
use crate::error::MeilisearchHttpError;
|
||||||
use crate::routes::indexes::search::search_kind;
|
use crate::routes::indexes::search::search_kind;
|
||||||
@ -73,6 +77,17 @@ pub struct Federation {
|
|||||||
pub limit: usize,
|
pub limit: usize,
|
||||||
#[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
#[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||||
pub offset: usize,
|
pub offset: usize,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchFacetsByIndex>)]
|
||||||
|
pub facets_by_index: BTreeMap<IndexUid, Option<Vec<String>>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeFacets>)]
|
||||||
|
pub merge_facets: Option<MergeFacets>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug, deserr::Deserr, Default)]
|
||||||
|
#[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct MergeFacets {
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)]
|
||||||
|
pub max_values_per_facet: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, deserr::Deserr)]
|
#[derive(Debug, deserr::Deserr)]
|
||||||
@ -82,7 +97,7 @@ pub struct FederatedSearch {
|
|||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
pub federation: Option<Federation>,
|
pub federation: Option<Federation>,
|
||||||
}
|
}
|
||||||
#[derive(Serialize, Clone, PartialEq)]
|
#[derive(Serialize, Clone)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct FederatedSearchResult {
|
pub struct FederatedSearchResult {
|
||||||
pub hits: Vec<SearchHit>,
|
pub hits: Vec<SearchHit>,
|
||||||
@ -93,6 +108,13 @@ pub struct FederatedSearchResult {
|
|||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub semantic_hit_count: Option<u32>,
|
pub semantic_hit_count: Option<u32>,
|
||||||
|
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||||
|
#[serde(skip_serializing_if = "FederatedFacets::is_empty")]
|
||||||
|
pub facets_by_index: FederatedFacets,
|
||||||
|
|
||||||
// These fields are only used for analytics purposes
|
// These fields are only used for analytics purposes
|
||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
pub degraded: bool,
|
pub degraded: bool,
|
||||||
@ -109,6 +131,9 @@ impl fmt::Debug for FederatedSearchResult {
|
|||||||
semantic_hit_count,
|
semantic_hit_count,
|
||||||
degraded,
|
degraded,
|
||||||
used_negative_operator,
|
used_negative_operator,
|
||||||
|
facet_distribution,
|
||||||
|
facet_stats,
|
||||||
|
facets_by_index,
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
let mut debug = f.debug_struct("SearchResult");
|
let mut debug = f.debug_struct("SearchResult");
|
||||||
@ -122,9 +147,18 @@ impl fmt::Debug for FederatedSearchResult {
|
|||||||
if *degraded {
|
if *degraded {
|
||||||
debug.field("degraded", degraded);
|
debug.field("degraded", degraded);
|
||||||
}
|
}
|
||||||
|
if let Some(facet_distribution) = facet_distribution {
|
||||||
|
debug.field("facet_distribution", &facet_distribution);
|
||||||
|
}
|
||||||
|
if let Some(facet_stats) = facet_stats {
|
||||||
|
debug.field("facet_stats", &facet_stats);
|
||||||
|
}
|
||||||
if let Some(semantic_hit_count) = semantic_hit_count {
|
if let Some(semantic_hit_count) = semantic_hit_count {
|
||||||
debug.field("semantic_hit_count", &semantic_hit_count);
|
debug.field("semantic_hit_count", &semantic_hit_count);
|
||||||
}
|
}
|
||||||
|
if !facets_by_index.is_empty() {
|
||||||
|
debug.field("facets_by_index", &facets_by_index);
|
||||||
|
}
|
||||||
|
|
||||||
debug.finish()
|
debug.finish()
|
||||||
}
|
}
|
||||||
@ -313,16 +347,104 @@ struct SearchHitByIndex {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct SearchResultByIndex {
|
struct SearchResultByIndex {
|
||||||
|
index: String,
|
||||||
hits: Vec<SearchHitByIndex>,
|
hits: Vec<SearchHitByIndex>,
|
||||||
candidates: RoaringBitmap,
|
estimated_total_hits: usize,
|
||||||
degraded: bool,
|
degraded: bool,
|
||||||
used_negative_operator: bool,
|
used_negative_operator: bool,
|
||||||
|
facets: Option<ComputedFacets>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Default, Serialize)]
|
||||||
|
pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>);
|
||||||
|
|
||||||
|
impl FederatedFacets {
|
||||||
|
pub fn insert(&mut self, index: String, facets: Option<ComputedFacets>) {
|
||||||
|
if let Some(facets) = facets {
|
||||||
|
self.0.insert(index, facets);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.0.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn merge(
|
||||||
|
self,
|
||||||
|
MergeFacets { max_values_per_facet }: MergeFacets,
|
||||||
|
facet_order: BTreeMap<String, (String, OrderBy)>,
|
||||||
|
) -> Option<ComputedFacets> {
|
||||||
|
if self.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut distribution: BTreeMap<String, _> = Default::default();
|
||||||
|
let mut stats: BTreeMap<String, FacetStats> = Default::default();
|
||||||
|
|
||||||
|
for facets_by_index in self.0.into_values() {
|
||||||
|
for (facet, index_distribution) in facets_by_index.distribution {
|
||||||
|
match distribution.entry(facet) {
|
||||||
|
std::collections::btree_map::Entry::Vacant(entry) => {
|
||||||
|
entry.insert(index_distribution);
|
||||||
|
}
|
||||||
|
std::collections::btree_map::Entry::Occupied(mut entry) => {
|
||||||
|
let distribution = entry.get_mut();
|
||||||
|
|
||||||
|
for (value, index_count) in index_distribution {
|
||||||
|
distribution
|
||||||
|
.entry(value)
|
||||||
|
.and_modify(|count| *count += index_count)
|
||||||
|
.or_insert(index_count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (facet, index_stats) in facets_by_index.stats {
|
||||||
|
match stats.entry(facet) {
|
||||||
|
std::collections::btree_map::Entry::Vacant(entry) => {
|
||||||
|
entry.insert(index_stats);
|
||||||
|
}
|
||||||
|
std::collections::btree_map::Entry::Occupied(mut entry) => {
|
||||||
|
let stats = entry.get_mut();
|
||||||
|
|
||||||
|
stats.min = f64::min(stats.min, index_stats.min);
|
||||||
|
stats.max = f64::max(stats.max, index_stats.max);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fixup order
|
||||||
|
for (facet, values) in &mut distribution {
|
||||||
|
let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default();
|
||||||
|
|
||||||
|
match order_by {
|
||||||
|
OrderBy::Lexicographic => {
|
||||||
|
values.sort_unstable_by(|left, _, right, _| left.cmp(right))
|
||||||
|
}
|
||||||
|
OrderBy::Count => {
|
||||||
|
values.sort_unstable_by(|_, left, _, right| {
|
||||||
|
left.cmp(right)
|
||||||
|
// biggest first
|
||||||
|
.reverse()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(max_values_per_facet) = max_values_per_facet {
|
||||||
|
values.truncate(max_values_per_facet)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(ComputedFacets { distribution, stats })
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn perform_federated_search(
|
pub fn perform_federated_search(
|
||||||
index_scheduler: &IndexScheduler,
|
index_scheduler: &IndexScheduler,
|
||||||
queries: Vec<SearchQueryWithIndex>,
|
queries: Vec<SearchQueryWithIndex>,
|
||||||
federation: Federation,
|
mut federation: Federation,
|
||||||
features: RoFeatures,
|
features: RoFeatures,
|
||||||
) -> Result<FederatedSearchResult, ResponseError> {
|
) -> Result<FederatedSearchResult, ResponseError> {
|
||||||
let before_search = std::time::Instant::now();
|
let before_search = std::time::Instant::now();
|
||||||
@ -342,6 +464,16 @@ pub fn perform_federated_search(
|
|||||||
.into());
|
.into());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(facets) = federated_query.has_facets() {
|
||||||
|
let facets = facets.to_owned();
|
||||||
|
return Err(MeilisearchHttpError::FacetsInFederatedQuery(
|
||||||
|
query_index,
|
||||||
|
federated_query.index_uid.into_inner(),
|
||||||
|
facets,
|
||||||
|
)
|
||||||
|
.into());
|
||||||
|
}
|
||||||
|
|
||||||
let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
|
let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
|
||||||
|
|
||||||
queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex {
|
queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex {
|
||||||
@ -353,13 +485,24 @@ pub fn perform_federated_search(
|
|||||||
|
|
||||||
// 2. perform queries, merge and make hits index by index
|
// 2. perform queries, merge and make hits index by index
|
||||||
let required_hit_count = federation.limit + federation.offset;
|
let required_hit_count = federation.limit + federation.offset;
|
||||||
|
|
||||||
// In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic
|
// In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic
|
||||||
// Then in step (3), we'll update its value if there is any semantic search
|
// Then in step (3), we'll update its value if there is any semantic search
|
||||||
let mut semantic_hit_count = None;
|
let mut semantic_hit_count = None;
|
||||||
let mut results_by_index = Vec::with_capacity(queries_by_index.len());
|
let mut results_by_index = Vec::with_capacity(queries_by_index.len());
|
||||||
let mut previous_query_data: Option<(RankingRules, usize, String)> = None;
|
let mut previous_query_data: Option<(RankingRules, usize, String)> = None;
|
||||||
|
|
||||||
|
// remember the order and name of first index for each facet when merging with index settings
|
||||||
|
// to detect if the order is inconsistent for a facet.
|
||||||
|
let mut facet_order: Option<BTreeMap<String, (String, OrderBy)>> = match federation.merge_facets
|
||||||
|
{
|
||||||
|
Some(MergeFacets { .. }) => Some(Default::default()),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
|
||||||
for (index_uid, queries) in queries_by_index {
|
for (index_uid, queries) in queries_by_index {
|
||||||
|
let first_query_index = queries.first().map(|query| query.query_index);
|
||||||
|
|
||||||
let index = match index_scheduler.index(&index_uid) {
|
let index = match index_scheduler.index(&index_uid) {
|
||||||
Ok(index) => index,
|
Ok(index) => index,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
@ -367,9 +510,8 @@ pub fn perform_federated_search(
|
|||||||
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
||||||
// here the resource not found is not part of the URL.
|
// here the resource not found is not part of the URL.
|
||||||
err.code = StatusCode::BAD_REQUEST;
|
err.code = StatusCode::BAD_REQUEST;
|
||||||
if let Some(query) = queries.first() {
|
if let Some(query_index) = first_query_index {
|
||||||
err.message =
|
err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message);
|
||||||
format!("Inside `.queries[{}]`: {}", query.query_index, err.message);
|
|
||||||
}
|
}
|
||||||
return Err(err);
|
return Err(err);
|
||||||
}
|
}
|
||||||
@ -394,6 +536,23 @@ pub fn perform_federated_search(
|
|||||||
let mut used_negative_operator = false;
|
let mut used_negative_operator = false;
|
||||||
let mut candidates = RoaringBitmap::new();
|
let mut candidates = RoaringBitmap::new();
|
||||||
|
|
||||||
|
let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten();
|
||||||
|
|
||||||
|
// TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries
|
||||||
|
if let Err(mut error) =
|
||||||
|
check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn)
|
||||||
|
{
|
||||||
|
error.message = format!(
|
||||||
|
"Inside `.federation.facetsByIndex.{index_uid}`: {error}{}",
|
||||||
|
if let Some(query_index) = first_query_index {
|
||||||
|
format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
|
||||||
|
} else {
|
||||||
|
Default::default()
|
||||||
|
}
|
||||||
|
);
|
||||||
|
return Err(error);
|
||||||
|
}
|
||||||
|
|
||||||
// 2.1. Compute all candidates for each query in the index
|
// 2.1. Compute all candidates for each query in the index
|
||||||
let mut results_by_query = Vec::with_capacity(queries.len());
|
let mut results_by_query = Vec::with_capacity(queries.len());
|
||||||
|
|
||||||
@ -562,34 +721,116 @@ pub fn perform_federated_search(
|
|||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let merged_result = merged_result?;
|
let merged_result = merged_result?;
|
||||||
|
|
||||||
|
let estimated_total_hits = candidates.len() as usize;
|
||||||
|
|
||||||
|
let facets = facets_by_index
|
||||||
|
.map(|facets_by_index| {
|
||||||
|
compute_facet_distribution_stats(
|
||||||
|
&facets_by_index,
|
||||||
|
&index,
|
||||||
|
&rtxn,
|
||||||
|
candidates,
|
||||||
|
super::Route::MultiSearch,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.transpose()
|
||||||
|
.map_err(|mut error| {
|
||||||
|
error.message = format!(
|
||||||
|
"Inside `.federation.facetsByIndex.{index_uid}`: {}{}",
|
||||||
|
error.message,
|
||||||
|
if let Some(query_index) = first_query_index {
|
||||||
|
format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
|
||||||
|
} else {
|
||||||
|
Default::default()
|
||||||
|
}
|
||||||
|
);
|
||||||
|
error
|
||||||
|
})?;
|
||||||
|
|
||||||
results_by_index.push(SearchResultByIndex {
|
results_by_index.push(SearchResultByIndex {
|
||||||
|
index: index_uid,
|
||||||
hits: merged_result,
|
hits: merged_result,
|
||||||
candidates,
|
estimated_total_hits,
|
||||||
degraded,
|
degraded,
|
||||||
used_negative_operator,
|
used_negative_operator,
|
||||||
|
facets,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index.
|
||||||
|
for (index_uid, facets) in federation.facets_by_index {
|
||||||
|
let index = match index_scheduler.index(&index_uid) {
|
||||||
|
Ok(index) => index,
|
||||||
|
Err(err) => {
|
||||||
|
let mut err = ResponseError::from(err);
|
||||||
|
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
||||||
|
// here the resource not found is not part of the URL.
|
||||||
|
err.code = StatusCode::BAD_REQUEST;
|
||||||
|
err.message = format!(
|
||||||
|
"Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries",
|
||||||
|
err.message
|
||||||
|
);
|
||||||
|
return Err(err);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Important: this is the only transaction we'll use for this index during this federated search
|
||||||
|
let rtxn = index.read_txn()?;
|
||||||
|
|
||||||
|
if let Err(mut error) =
|
||||||
|
check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn)
|
||||||
|
{
|
||||||
|
error.message = format!(
|
||||||
|
"Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries",
|
||||||
|
);
|
||||||
|
return Err(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(facets) = facets {
|
||||||
|
if let Err(mut error) = compute_facet_distribution_stats(
|
||||||
|
&facets,
|
||||||
|
&index,
|
||||||
|
&rtxn,
|
||||||
|
Default::default(),
|
||||||
|
super::Route::MultiSearch,
|
||||||
|
) {
|
||||||
|
error.message =
|
||||||
|
format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message);
|
||||||
|
return Err(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// 3. merge hits and metadata across indexes
|
// 3. merge hits and metadata across indexes
|
||||||
// 3.1 merge metadata
|
// 3.1 merge metadata
|
||||||
let (estimated_total_hits, degraded, used_negative_operator) = {
|
let (estimated_total_hits, degraded, used_negative_operator, facets) = {
|
||||||
let mut estimated_total_hits = 0;
|
let mut estimated_total_hits = 0;
|
||||||
let mut degraded = false;
|
let mut degraded = false;
|
||||||
let mut used_negative_operator = false;
|
let mut used_negative_operator = false;
|
||||||
|
|
||||||
|
let mut facets: FederatedFacets = FederatedFacets::default();
|
||||||
|
|
||||||
for SearchResultByIndex {
|
for SearchResultByIndex {
|
||||||
|
index,
|
||||||
hits: _,
|
hits: _,
|
||||||
candidates,
|
estimated_total_hits: estimated_total_hits_by_index,
|
||||||
|
facets: facets_by_index,
|
||||||
degraded: degraded_by_index,
|
degraded: degraded_by_index,
|
||||||
used_negative_operator: used_negative_operator_by_index,
|
used_negative_operator: used_negative_operator_by_index,
|
||||||
} in &results_by_index
|
} in &mut results_by_index
|
||||||
{
|
{
|
||||||
estimated_total_hits += candidates.len() as usize;
|
estimated_total_hits += *estimated_total_hits_by_index;
|
||||||
degraded |= *degraded_by_index;
|
degraded |= *degraded_by_index;
|
||||||
used_negative_operator |= *used_negative_operator_by_index;
|
used_negative_operator |= *used_negative_operator_by_index;
|
||||||
|
|
||||||
|
let facets_by_index = std::mem::take(facets_by_index);
|
||||||
|
let index = std::mem::take(index);
|
||||||
|
|
||||||
|
facets.insert(index, facets_by_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
(estimated_total_hits, degraded, used_negative_operator)
|
(estimated_total_hits, degraded, used_negative_operator, facets)
|
||||||
};
|
};
|
||||||
|
|
||||||
// 3.2 merge hits
|
// 3.2 merge hits
|
||||||
@ -606,6 +847,20 @@ pub fn perform_federated_search(
|
|||||||
.map(|hit| hit.hit)
|
.map(|hit| hit.hit)
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
let (facet_distribution, facet_stats, facets_by_index) =
|
||||||
|
match federation.merge_facets.zip(facet_order) {
|
||||||
|
Some((merge_facets, facet_order)) => {
|
||||||
|
let facets = facets.merge(merge_facets, facet_order);
|
||||||
|
|
||||||
|
let (facet_distribution, facet_stats) = facets
|
||||||
|
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
|
||||||
|
.unzip();
|
||||||
|
|
||||||
|
(facet_distribution, facet_stats, FederatedFacets::default())
|
||||||
|
}
|
||||||
|
None => (None, None, facets),
|
||||||
|
};
|
||||||
|
|
||||||
let search_result = FederatedSearchResult {
|
let search_result = FederatedSearchResult {
|
||||||
hits: merged_hits,
|
hits: merged_hits,
|
||||||
processing_time_ms: before_search.elapsed().as_millis(),
|
processing_time_ms: before_search.elapsed().as_millis(),
|
||||||
@ -617,7 +872,39 @@ pub fn perform_federated_search(
|
|||||||
semantic_hit_count,
|
semantic_hit_count,
|
||||||
degraded,
|
degraded,
|
||||||
used_negative_operator,
|
used_negative_operator,
|
||||||
|
facet_distribution,
|
||||||
|
facet_stats,
|
||||||
|
facets_by_index,
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(search_result)
|
Ok(search_result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn check_facet_order(
|
||||||
|
facet_order: &mut Option<BTreeMap<String, (String, OrderBy)>>,
|
||||||
|
current_index: &str,
|
||||||
|
facets_by_index: &Option<Vec<String>>,
|
||||||
|
index: &milli::Index,
|
||||||
|
rtxn: &milli::heed::RoTxn<'_>,
|
||||||
|
) -> Result<(), ResponseError> {
|
||||||
|
if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) {
|
||||||
|
let index_facet_order = index.sort_facet_values_by(rtxn)?;
|
||||||
|
for facet in facets_by_index {
|
||||||
|
let index_facet_order = index_facet_order.get(facet);
|
||||||
|
let (previous_index, previous_facet_order) = facet_order
|
||||||
|
.entry(facet.to_owned())
|
||||||
|
.or_insert_with(|| (current_index.to_owned(), index_facet_order));
|
||||||
|
if previous_facet_order != &index_facet_order {
|
||||||
|
return Err(MeilisearchHttpError::InconsistentFacetOrder {
|
||||||
|
facet: facet.clone(),
|
||||||
|
previous_facet_order: *previous_facet_order,
|
||||||
|
previous_uid: previous_index.clone(),
|
||||||
|
current_uid: current_index.to_owned(),
|
||||||
|
index_facet_order,
|
||||||
|
}
|
||||||
|
.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
@ -267,58 +267,54 @@ impl fmt::Debug for SearchQuery {
|
|||||||
pub struct HybridQuery {
|
pub struct HybridQuery {
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)]
|
||||||
pub semantic_ratio: SemanticRatio,
|
pub semantic_ratio: SemanticRatio,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
|
#[deserr(error = DeserrJsonError<InvalidEmbedder>)]
|
||||||
pub embedder: Option<String>,
|
pub embedder: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub enum SearchKind {
|
pub enum SearchKind {
|
||||||
KeywordOnly,
|
KeywordOnly,
|
||||||
SemanticOnly { embedder_name: String, embedder: Arc<Embedder> },
|
SemanticOnly { embedder_name: String, embedder: Arc<Embedder>, quantized: bool },
|
||||||
Hybrid { embedder_name: String, embedder: Arc<Embedder>, semantic_ratio: f32 },
|
Hybrid { embedder_name: String, embedder: Arc<Embedder>, quantized: bool, semantic_ratio: f32 },
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SearchKind {
|
impl SearchKind {
|
||||||
pub(crate) fn semantic(
|
pub(crate) fn semantic(
|
||||||
index_scheduler: &index_scheduler::IndexScheduler,
|
index_scheduler: &index_scheduler::IndexScheduler,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
embedder_name: Option<&str>,
|
embedder_name: &str,
|
||||||
vector_len: Option<usize>,
|
vector_len: Option<usize>,
|
||||||
) -> Result<Self, ResponseError> {
|
) -> Result<Self, ResponseError> {
|
||||||
let (embedder_name, embedder) =
|
let (embedder_name, embedder, quantized) =
|
||||||
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
||||||
Ok(Self::SemanticOnly { embedder_name, embedder })
|
Ok(Self::SemanticOnly { embedder_name, embedder, quantized })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn hybrid(
|
pub(crate) fn hybrid(
|
||||||
index_scheduler: &index_scheduler::IndexScheduler,
|
index_scheduler: &index_scheduler::IndexScheduler,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
embedder_name: Option<&str>,
|
embedder_name: &str,
|
||||||
semantic_ratio: f32,
|
semantic_ratio: f32,
|
||||||
vector_len: Option<usize>,
|
vector_len: Option<usize>,
|
||||||
) -> Result<Self, ResponseError> {
|
) -> Result<Self, ResponseError> {
|
||||||
let (embedder_name, embedder) =
|
let (embedder_name, embedder, quantized) =
|
||||||
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
|
||||||
Ok(Self::Hybrid { embedder_name, embedder, semantic_ratio })
|
Ok(Self::Hybrid { embedder_name, embedder, quantized, semantic_ratio })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn embedder(
|
pub(crate) fn embedder(
|
||||||
index_scheduler: &index_scheduler::IndexScheduler,
|
index_scheduler: &index_scheduler::IndexScheduler,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
embedder_name: Option<&str>,
|
embedder_name: &str,
|
||||||
vector_len: Option<usize>,
|
vector_len: Option<usize>,
|
||||||
) -> Result<(String, Arc<Embedder>), ResponseError> {
|
) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
|
||||||
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
||||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
let embedders = index_scheduler.embedders(embedder_configs)?;
|
||||||
|
|
||||||
let embedder_name = embedder_name.unwrap_or_else(|| embedders.get_default_embedder_name());
|
let (embedder, _, quantized) = embedders
|
||||||
|
.get(embedder_name)
|
||||||
let embedder = embedders.get(embedder_name);
|
|
||||||
|
|
||||||
let embedder = embedder
|
|
||||||
.ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned()))
|
.ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned()))
|
||||||
.map_err(milli::Error::from)?
|
.map_err(milli::Error::from)?;
|
||||||
.0;
|
|
||||||
|
|
||||||
if let Some(vector_len) = vector_len {
|
if let Some(vector_len) = vector_len {
|
||||||
if vector_len != embedder.dimensions() {
|
if vector_len != embedder.dimensions() {
|
||||||
@ -332,7 +328,7 @@ impl SearchKind {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((embedder_name.to_owned(), embedder))
|
Ok((embedder_name.to_owned(), embedder, quantized))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -441,9 +437,6 @@ pub struct SearchQueryWithIndex {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl SearchQueryWithIndex {
|
impl SearchQueryWithIndex {
|
||||||
pub fn has_federation_options(&self) -> bool {
|
|
||||||
self.federation_options.is_some()
|
|
||||||
}
|
|
||||||
pub fn has_pagination(&self) -> Option<&'static str> {
|
pub fn has_pagination(&self) -> Option<&'static str> {
|
||||||
if self.offset.is_some() {
|
if self.offset.is_some() {
|
||||||
Some("offset")
|
Some("offset")
|
||||||
@ -458,6 +451,10 @@ impl SearchQueryWithIndex {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn has_facets(&self) -> Option<&[String]> {
|
||||||
|
self.facets.as_deref().filter(|v| !v.is_empty())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) {
|
pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) {
|
||||||
let SearchQueryWithIndex {
|
let SearchQueryWithIndex {
|
||||||
index_uid,
|
index_uid,
|
||||||
@ -537,8 +534,8 @@ pub struct SimilarQuery {
|
|||||||
pub limit: usize,
|
pub limit: usize,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarFilter>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSimilarFilter>)]
|
||||||
pub filter: Option<Value>,
|
pub filter: Option<Value>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
|
#[deserr(error = DeserrJsonError<InvalidEmbedder>)]
|
||||||
pub embedder: Option<String>,
|
pub embedder: String,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)]
|
||||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSimilarRetrieveVectors>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSimilarRetrieveVectors>)]
|
||||||
@ -792,7 +789,7 @@ fn prepare_search<'t>(
|
|||||||
search.query(q);
|
search.query(q);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
SearchKind::SemanticOnly { embedder_name, embedder } => {
|
SearchKind::SemanticOnly { embedder_name, embedder, quantized } => {
|
||||||
let vector = match query.vector.clone() {
|
let vector = match query.vector.clone() {
|
||||||
Some(vector) => vector,
|
Some(vector) => vector,
|
||||||
None => {
|
None => {
|
||||||
@ -806,14 +803,19 @@ fn prepare_search<'t>(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
search.semantic(embedder_name.clone(), embedder.clone(), Some(vector));
|
search.semantic(embedder_name.clone(), embedder.clone(), *quantized, Some(vector));
|
||||||
}
|
}
|
||||||
SearchKind::Hybrid { embedder_name, embedder, semantic_ratio: _ } => {
|
SearchKind::Hybrid { embedder_name, embedder, quantized, semantic_ratio: _ } => {
|
||||||
if let Some(q) = &query.q {
|
if let Some(q) = &query.q {
|
||||||
search.query(q);
|
search.query(q);
|
||||||
}
|
}
|
||||||
// will be embedded in hybrid search if necessary
|
// will be embedded in hybrid search if necessary
|
||||||
search.semantic(embedder_name.clone(), embedder.clone(), query.vector.clone());
|
search.semantic(
|
||||||
|
embedder_name.clone(),
|
||||||
|
embedder.clone(),
|
||||||
|
*quantized,
|
||||||
|
query.vector.clone(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -987,39 +989,13 @@ pub fn perform_search(
|
|||||||
HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits }
|
HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits }
|
||||||
};
|
};
|
||||||
|
|
||||||
let (facet_distribution, facet_stats) = match facets {
|
let (facet_distribution, facet_stats) = facets
|
||||||
Some(ref fields) => {
|
.map(move |facets| {
|
||||||
let mut facet_distribution = index.facets_distribution(&rtxn);
|
compute_facet_distribution_stats(&facets, index, &rtxn, candidates, Route::Search)
|
||||||
|
})
|
||||||
let max_values_by_facet = index
|
.transpose()?
|
||||||
.max_values_per_facet(&rtxn)
|
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
|
||||||
.map_err(milli::Error::from)?
|
.unzip();
|
||||||
.map(|x| x as usize)
|
|
||||||
.unwrap_or(DEFAULT_VALUES_PER_FACET);
|
|
||||||
facet_distribution.max_values_per_facet(max_values_by_facet);
|
|
||||||
|
|
||||||
let sort_facet_values_by =
|
|
||||||
index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?;
|
|
||||||
|
|
||||||
if fields.iter().all(|f| f != "*") {
|
|
||||||
let fields: Vec<_> =
|
|
||||||
fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect();
|
|
||||||
facet_distribution.facets(fields);
|
|
||||||
}
|
|
||||||
|
|
||||||
let distribution = facet_distribution
|
|
||||||
.candidates(candidates)
|
|
||||||
.default_order_by(sort_facet_values_by.get("*"))
|
|
||||||
.execute()?;
|
|
||||||
let stats = facet_distribution.compute_stats()?;
|
|
||||||
(Some(distribution), Some(stats))
|
|
||||||
}
|
|
||||||
None => (None, None),
|
|
||||||
};
|
|
||||||
|
|
||||||
let facet_stats = facet_stats.map(|stats| {
|
|
||||||
stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect()
|
|
||||||
});
|
|
||||||
|
|
||||||
let result = SearchResult {
|
let result = SearchResult {
|
||||||
hits: documents,
|
hits: documents,
|
||||||
@ -1035,6 +1011,61 @@ pub fn perform_search(
|
|||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Default, Serialize)]
|
||||||
|
pub struct ComputedFacets {
|
||||||
|
pub distribution: BTreeMap<String, IndexMap<String, u64>>,
|
||||||
|
pub stats: BTreeMap<String, FacetStats>,
|
||||||
|
}
|
||||||
|
|
||||||
|
enum Route {
|
||||||
|
Search,
|
||||||
|
MultiSearch,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compute_facet_distribution_stats<S: AsRef<str>>(
|
||||||
|
facets: &[S],
|
||||||
|
index: &Index,
|
||||||
|
rtxn: &RoTxn,
|
||||||
|
candidates: roaring::RoaringBitmap,
|
||||||
|
route: Route,
|
||||||
|
) -> Result<ComputedFacets, ResponseError> {
|
||||||
|
let mut facet_distribution = index.facets_distribution(rtxn);
|
||||||
|
|
||||||
|
let max_values_by_facet = index
|
||||||
|
.max_values_per_facet(rtxn)
|
||||||
|
.map_err(milli::Error::from)?
|
||||||
|
.map(|x| x as usize)
|
||||||
|
.unwrap_or(DEFAULT_VALUES_PER_FACET);
|
||||||
|
|
||||||
|
facet_distribution.max_values_per_facet(max_values_by_facet);
|
||||||
|
|
||||||
|
let sort_facet_values_by = index.sort_facet_values_by(rtxn).map_err(milli::Error::from)?;
|
||||||
|
|
||||||
|
// add specific facet if there is no placeholder
|
||||||
|
if facets.iter().all(|f| f.as_ref() != "*") {
|
||||||
|
let fields: Vec<_> =
|
||||||
|
facets.iter().map(|n| (n, sort_facet_values_by.get(n.as_ref()))).collect();
|
||||||
|
facet_distribution.facets(fields);
|
||||||
|
}
|
||||||
|
|
||||||
|
let distribution = facet_distribution
|
||||||
|
.candidates(candidates)
|
||||||
|
.default_order_by(sort_facet_values_by.get("*"))
|
||||||
|
.execute()
|
||||||
|
.map_err(|error| match (error, route) {
|
||||||
|
(
|
||||||
|
error @ milli::Error::UserError(milli::UserError::InvalidFacetsDistribution {
|
||||||
|
..
|
||||||
|
}),
|
||||||
|
Route::MultiSearch,
|
||||||
|
) => ResponseError::from_msg(error.to_string(), Code::InvalidMultiSearchFacets),
|
||||||
|
(error, _) => error.into(),
|
||||||
|
})?;
|
||||||
|
let stats = facet_distribution.compute_stats()?;
|
||||||
|
let stats = stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect();
|
||||||
|
Ok(ComputedFacets { distribution, stats })
|
||||||
|
}
|
||||||
|
|
||||||
pub fn search_from_kind(
|
pub fn search_from_kind(
|
||||||
search_kind: SearchKind,
|
search_kind: SearchKind,
|
||||||
search: milli::Search<'_>,
|
search: milli::Search<'_>,
|
||||||
@ -1413,6 +1444,7 @@ pub fn perform_similar(
|
|||||||
query: SimilarQuery,
|
query: SimilarQuery,
|
||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
|
quantized: bool,
|
||||||
retrieve_vectors: RetrieveVectors,
|
retrieve_vectors: RetrieveVectors,
|
||||||
features: RoFeatures,
|
features: RoFeatures,
|
||||||
) -> Result<SimilarResult, ResponseError> {
|
) -> Result<SimilarResult, ResponseError> {
|
||||||
@ -1441,8 +1473,16 @@ pub fn perform_similar(
|
|||||||
));
|
));
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut similar =
|
let mut similar = milli::Similar::new(
|
||||||
milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder);
|
internal_id,
|
||||||
|
offset,
|
||||||
|
limit,
|
||||||
|
index,
|
||||||
|
&rtxn,
|
||||||
|
embedder_name,
|
||||||
|
embedder,
|
||||||
|
quantized,
|
||||||
|
);
|
||||||
|
|
||||||
if let Some(ref filter) = query.filter {
|
if let Some(ref filter) = query.filter {
|
||||||
if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? {
|
if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? {
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
//! And should drop the Permit only once you have freed all the RAM consumed by the method.
|
//! And should drop the Permit only once you have freed all the RAM consumed by the method.
|
||||||
|
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use rand::rngs::StdRng;
|
use rand::rngs::StdRng;
|
||||||
use rand::{Rng, SeedableRng};
|
use rand::{Rng, SeedableRng};
|
||||||
@ -29,16 +30,31 @@ use crate::error::MeilisearchHttpError;
|
|||||||
pub struct SearchQueue {
|
pub struct SearchQueue {
|
||||||
sender: mpsc::Sender<oneshot::Sender<Permit>>,
|
sender: mpsc::Sender<oneshot::Sender<Permit>>,
|
||||||
capacity: usize,
|
capacity: usize,
|
||||||
|
/// If we have waited longer than this to get a permit, we should abort the search request entirely.
|
||||||
|
/// The client probably already closed the connection, but we have no way to find out.
|
||||||
|
time_to_abort: Duration,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// You should only run search requests while holding this permit.
|
/// You should only run search requests while holding this permit.
|
||||||
/// Once it's dropped, a new search request will be able to process.
|
/// Once it's dropped, a new search request will be able to process.
|
||||||
|
/// You should always try to drop the permit yourself calling the `drop` async method on it.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Permit {
|
pub struct Permit {
|
||||||
sender: mpsc::Sender<()>,
|
sender: mpsc::Sender<()>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Permit {
|
||||||
|
/// Drop the permit giving back on permit to the search queue.
|
||||||
|
pub async fn drop(self) {
|
||||||
|
// if the channel is closed then the whole instance is down
|
||||||
|
let _ = self.sender.send(()).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Drop for Permit {
|
impl Drop for Permit {
|
||||||
|
/// The implicit drop implementation can still be called in multiple cases:
|
||||||
|
/// - We forgot to call the explicit one somewhere => this should be fixed on our side asap
|
||||||
|
/// - The future is cancelled while running and the permit dropped with it
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
let sender = self.sender.clone();
|
let sender = self.sender.clone();
|
||||||
// if the channel is closed then the whole instance is down
|
// if the channel is closed then the whole instance is down
|
||||||
@ -53,7 +69,11 @@ impl SearchQueue {
|
|||||||
let (sender, receiver) = mpsc::channel(1);
|
let (sender, receiver) = mpsc::channel(1);
|
||||||
|
|
||||||
tokio::task::spawn(Self::run(capacity, paralellism, receiver));
|
tokio::task::spawn(Self::run(capacity, paralellism, receiver));
|
||||||
Self { sender, capacity }
|
Self { sender, capacity, time_to_abort: Duration::from_secs(60) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_time_to_abort(self, time_to_abort: Duration) -> Self {
|
||||||
|
Self { time_to_abort, ..self }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This function is the main loop, it's in charge on scheduling which search request should execute first and
|
/// This function is the main loop, it's in charge on scheduling which search request should execute first and
|
||||||
@ -119,9 +139,23 @@ impl SearchQueue {
|
|||||||
/// Returns a search `Permit`.
|
/// Returns a search `Permit`.
|
||||||
/// It should be dropped as soon as you've freed all the RAM associated with the search request being processed.
|
/// It should be dropped as soon as you've freed all the RAM associated with the search request being processed.
|
||||||
pub async fn try_get_search_permit(&self) -> Result<Permit, MeilisearchHttpError> {
|
pub async fn try_get_search_permit(&self) -> Result<Permit, MeilisearchHttpError> {
|
||||||
|
let now = std::time::Instant::now();
|
||||||
let (sender, receiver) = oneshot::channel();
|
let (sender, receiver) = oneshot::channel();
|
||||||
self.sender.send(sender).await.map_err(|_| MeilisearchHttpError::SearchLimiterIsDown)?;
|
self.sender.send(sender).await.map_err(|_| MeilisearchHttpError::SearchLimiterIsDown)?;
|
||||||
receiver.await.map_err(|_| MeilisearchHttpError::TooManySearchRequests(self.capacity))
|
let permit = receiver
|
||||||
|
.await
|
||||||
|
.map_err(|_| MeilisearchHttpError::TooManySearchRequests(self.capacity))?;
|
||||||
|
|
||||||
|
// If we've been for more than one minute to get a search permit, it's better to simply
|
||||||
|
// abort the search request than spending time processing something were the client
|
||||||
|
// most certainly exited or got a timeout a long time ago.
|
||||||
|
// We may find a better solution in https://github.com/actix/actix-web/issues/3462.
|
||||||
|
if now.elapsed() > self.time_to_abort {
|
||||||
|
permit.drop().await;
|
||||||
|
Err(MeilisearchHttpError::TooManySearchRequests(self.capacity))
|
||||||
|
} else {
|
||||||
|
Ok(permit)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns `Ok(())` if everything seems normal.
|
/// Returns `Ok(())` if everything seems normal.
|
||||||
|
@ -11,13 +11,11 @@ use actix_web::http::StatusCode;
|
|||||||
use byte_unit::{Byte, Unit};
|
use byte_unit::{Byte, Unit};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use meilisearch::option::{IndexerOpts, MaxMemory, MaxThreads, Opt};
|
use meilisearch::option::{IndexerOpts, MaxMemory, MaxThreads, Opt};
|
||||||
use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer};
|
use meilisearch::setup_meilisearch;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
use tokio::sync::OnceCell;
|
use tokio::sync::OnceCell;
|
||||||
use tokio::time::sleep;
|
use tokio::time::sleep;
|
||||||
use tracing::level_filters::LevelFilter;
|
|
||||||
use tracing_subscriber::Layer;
|
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use super::index::Index;
|
use super::index::Index;
|
||||||
@ -183,7 +181,7 @@ impl Server<Shared> {
|
|||||||
let options = default_settings(dir.path());
|
let options = default_settings(dir.path());
|
||||||
|
|
||||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
||||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
let service = Service { index_scheduler, auth, api_key: None, options };
|
||||||
|
|
||||||
Server { service, _dir: Some(dir), _marker: PhantomData }
|
Server { service, _dir: Some(dir), _marker: PhantomData }
|
||||||
}
|
}
|
||||||
@ -263,28 +261,7 @@ impl<State> Server<State> {
|
|||||||
Response = ServiceResponse<impl MessageBody>,
|
Response = ServiceResponse<impl MessageBody>,
|
||||||
Error = actix_web::Error,
|
Error = actix_web::Error,
|
||||||
> {
|
> {
|
||||||
let (_route_layer, route_layer_handle) =
|
self.service.init_web_app().await
|
||||||
tracing_subscriber::reload::Layer::new(None.with_filter(
|
|
||||||
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
|
|
||||||
));
|
|
||||||
let (_stderr_layer, stderr_layer_handle) = tracing_subscriber::reload::Layer::new(
|
|
||||||
(Box::new(
|
|
||||||
tracing_subscriber::fmt::layer()
|
|
||||||
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE),
|
|
||||||
)
|
|
||||||
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>)
|
|
||||||
.with_filter(tracing_subscriber::filter::Targets::new()),
|
|
||||||
);
|
|
||||||
|
|
||||||
actix_web::test::init_service(create_app(
|
|
||||||
self.service.index_scheduler.clone().into(),
|
|
||||||
self.service.auth.clone().into(),
|
|
||||||
self.service.options.clone(),
|
|
||||||
(route_layer_handle, stderr_layer_handle),
|
|
||||||
analytics::MockAnalytics::new(&self.service.options),
|
|
||||||
true,
|
|
||||||
))
|
|
||||||
.await
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn list_api_keys(&self, params: &str) -> (Value, StatusCode) {
|
pub async fn list_api_keys(&self, params: &str) -> (Value, StatusCode) {
|
||||||
|
@ -1,10 +1,15 @@
|
|||||||
|
use std::num::NonZeroUsize;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use actix_web::body::MessageBody;
|
||||||
|
use actix_web::dev::ServiceResponse;
|
||||||
use actix_web::http::header::ContentType;
|
use actix_web::http::header::ContentType;
|
||||||
use actix_web::http::StatusCode;
|
use actix_web::http::StatusCode;
|
||||||
use actix_web::test;
|
use actix_web::test;
|
||||||
use actix_web::test::TestRequest;
|
use actix_web::test::TestRequest;
|
||||||
|
use actix_web::web::Data;
|
||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
|
use meilisearch::search_queue::SearchQueue;
|
||||||
use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer};
|
use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer};
|
||||||
use meilisearch_auth::AuthController;
|
use meilisearch_auth::AuthController;
|
||||||
use tracing::level_filters::LevelFilter;
|
use tracing::level_filters::LevelFilter;
|
||||||
@ -106,7 +111,13 @@ impl Service {
|
|||||||
self.request(req).await
|
self.request(req).await
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
|
pub async fn init_web_app(
|
||||||
|
&self,
|
||||||
|
) -> impl actix_web::dev::Service<
|
||||||
|
actix_http::Request,
|
||||||
|
Response = ServiceResponse<impl MessageBody>,
|
||||||
|
Error = actix_web::Error,
|
||||||
|
> {
|
||||||
let (_route_layer, route_layer_handle) =
|
let (_route_layer, route_layer_handle) =
|
||||||
tracing_subscriber::reload::Layer::new(None.with_filter(
|
tracing_subscriber::reload::Layer::new(None.with_filter(
|
||||||
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
|
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
|
||||||
@ -119,16 +130,25 @@ impl Service {
|
|||||||
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>)
|
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>)
|
||||||
.with_filter(tracing_subscriber::filter::Targets::new()),
|
.with_filter(tracing_subscriber::filter::Targets::new()),
|
||||||
);
|
);
|
||||||
|
let search_queue = SearchQueue::new(
|
||||||
|
self.options.experimental_search_queue_size,
|
||||||
|
NonZeroUsize::new(1).unwrap(),
|
||||||
|
);
|
||||||
|
|
||||||
let app = test::init_service(create_app(
|
actix_web::test::init_service(create_app(
|
||||||
self.index_scheduler.clone().into(),
|
self.index_scheduler.clone().into(),
|
||||||
self.auth.clone().into(),
|
self.auth.clone().into(),
|
||||||
|
Data::new(search_queue),
|
||||||
self.options.clone(),
|
self.options.clone(),
|
||||||
(route_layer_handle, stderr_layer_handle),
|
(route_layer_handle, stderr_layer_handle),
|
||||||
analytics::MockAnalytics::new(&self.options),
|
analytics::MockAnalytics::new(&self.options),
|
||||||
true,
|
true,
|
||||||
))
|
))
|
||||||
.await;
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
|
||||||
|
let app = self.init_web_app().await;
|
||||||
|
|
||||||
if let Some(api_key) = &self.api_key {
|
if let Some(api_key) = &self.api_key {
|
||||||
req = req.insert_header(("Authorization", ["Bearer ", api_key].concat()));
|
req = req.insert_header(("Authorization", ["Bearer ", api_key].concat()));
|
||||||
|
@ -6,6 +6,7 @@ use actix_web::test;
|
|||||||
|
|
||||||
use crate::common::{Server, Value};
|
use crate::common::{Server, Value};
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
enum HttpVerb {
|
enum HttpVerb {
|
||||||
Put,
|
Put,
|
||||||
Patch,
|
Patch,
|
||||||
@ -80,7 +81,7 @@ async fn error_json_bad_content_type() {
|
|||||||
let status_code = res.status();
|
let status_code = res.status();
|
||||||
let body = test::read_body(res).await;
|
let body = test::read_body(res).await;
|
||||||
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
||||||
assert_eq!(status_code, 415, "calling the route `{}` without content-type is supposed to throw a bad media type error", route);
|
assert_eq!(status_code, 415, "calling the route `{verb:?} {route}` without content-type is supposed to throw a bad media type error:\n{}", String::from_utf8_lossy(&body));
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
response,
|
response,
|
||||||
json!({
|
json!({
|
||||||
|
@ -136,7 +136,7 @@ async fn get_all_documents_bad_filter() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
|
||||||
"code": "invalid_document_filter",
|
"code": "invalid_document_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||||
@ -525,7 +525,7 @@ async fn delete_document_by_filter() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r###"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
|
||||||
"code": "invalid_document_filter",
|
"code": "invalid_document_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||||
@ -723,7 +723,7 @@ async fn fetch_document_by_filter() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r###"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
|
||||||
"code": "invalid_document_filter",
|
"code": "invalid_document_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||||
|
@ -2097,7 +2097,8 @@ async fn generate_and_import_dump_containing_vectors() {
|
|||||||
"source": "huggingFace",
|
"source": "huggingFace",
|
||||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||||
"documentTemplate": "{{doc.doggo}}"
|
"documentTemplate": "{{doc.doggo}}",
|
||||||
|
"documentTemplateMaxBytes": 400
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"searchCutoffMs": null,
|
"searchCutoffMs": null,
|
||||||
|
@ -1,10 +1,13 @@
|
|||||||
mod error;
|
mod error;
|
||||||
|
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use actix_web::http::header::ContentType;
|
use actix_web::http::header::ContentType;
|
||||||
|
use actix_web::web::Data;
|
||||||
use meili_snap::snapshot;
|
use meili_snap::snapshot;
|
||||||
|
use meilisearch::search_queue::SearchQueue;
|
||||||
use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer};
|
use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer};
|
||||||
use tracing::level_filters::LevelFilter;
|
use tracing::level_filters::LevelFilter;
|
||||||
use tracing_subscriber::layer::SubscriberExt;
|
use tracing_subscriber::layer::SubscriberExt;
|
||||||
@ -40,10 +43,15 @@ async fn basic_test_log_stream_route() {
|
|||||||
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE)
|
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE)
|
||||||
.with_filter(tracing_subscriber::filter::LevelFilter::from_str("OFF").unwrap()),
|
.with_filter(tracing_subscriber::filter::LevelFilter::from_str("OFF").unwrap()),
|
||||||
);
|
);
|
||||||
|
let search_queue = SearchQueue::new(
|
||||||
|
server.service.options.experimental_search_queue_size,
|
||||||
|
NonZeroUsize::new(1).unwrap(),
|
||||||
|
);
|
||||||
|
|
||||||
let app = actix_web::test::init_service(create_app(
|
let app = actix_web::test::init_service(create_app(
|
||||||
server.service.index_scheduler.clone().into(),
|
server.service.index_scheduler.clone().into(),
|
||||||
server.service.auth.clone().into(),
|
server.service.auth.clone().into(),
|
||||||
|
Data::new(search_queue),
|
||||||
server.service.options.clone(),
|
server.service.options.clone(),
|
||||||
(route_layer_handle, stderr_layer_handle),
|
(route_layer_handle, stderr_layer_handle),
|
||||||
analytics::MockAnalytics::new(&server.service.options),
|
analytics::MockAnalytics::new(&server.service.options),
|
||||||
|
@ -646,7 +646,7 @@ async fn filter_invalid_syntax_object() {
|
|||||||
.search(json!({"filter": "title & Glass"}), |response, code| {
|
.search(json!({"filter": "title & Glass"}), |response, code| {
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r###"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||||
"code": "invalid_search_filter",
|
"code": "invalid_search_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
@ -669,7 +669,7 @@ async fn filter_invalid_syntax_array() {
|
|||||||
.search(json!({"filter": ["title & Glass"]}), |response, code| {
|
.search(json!({"filter": ["title & Glass"]}), |response, code| {
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r###"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||||
"code": "invalid_search_filter",
|
"code": "invalid_search_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
@ -1163,7 +1163,7 @@ async fn search_with_contains_without_enabling_the_feature() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
|
"message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
|
||||||
"code": "feature_not_enabled",
|
"code": "feature_not_enabled",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||||
@ -1176,7 +1176,7 @@ async fn search_with_contains_without_enabling_the_feature() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir",
|
"message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir",
|
||||||
"code": "feature_not_enabled",
|
"code": "feature_not_enabled",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||||
@ -1192,7 +1192,7 @@ async fn search_with_contains_without_enabling_the_feature() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
|
"message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
|
||||||
"code": "feature_not_enabled",
|
"code": "feature_not_enabled",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||||
@ -1204,7 +1204,7 @@ async fn search_with_contains_without_enabling_the_feature() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
|
"message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
|
||||||
"code": "feature_not_enabled",
|
"code": "feature_not_enabled",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||||
|
@ -128,7 +128,7 @@ async fn simple_search() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -137,7 +137,7 @@ async fn simple_search() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true, "retrieveVectors": true}),
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -146,7 +146,7 @@ async fn simple_search() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true, "retrieveVectors": true}),
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -161,7 +161,7 @@ async fn limit_offset() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -174,7 +174,7 @@ async fn limit_offset() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -188,8 +188,11 @@ async fn simple_search_hf() {
|
|||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||||
|
|
||||||
let (response, code) =
|
let (response, code) = index
|
||||||
index.search_post(json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}})).await;
|
.search_post(
|
||||||
|
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2, "embedder": "default"}}),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
|
||||||
snapshot!(response["semanticHitCount"], @"0");
|
snapshot!(response["semanticHitCount"], @"0");
|
||||||
@ -197,7 +200,7 @@ async fn simple_search_hf() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
// disable ranking score as the vectors between architectures are not equal
|
// disable ranking score as the vectors between architectures are not equal
|
||||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.55}, "showRankingScore": false}),
|
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.55}, "showRankingScore": false}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -206,7 +209,7 @@ async fn simple_search_hf() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.8}, "showRankingScore": false}),
|
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.8}, "showRankingScore": false}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -215,7 +218,7 @@ async fn simple_search_hf() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Movie World", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
|
json!({"q": "Movie World", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -224,7 +227,7 @@ async fn simple_search_hf() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Wonder replacement", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
|
json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -237,7 +240,7 @@ async fn distribution_shift() {
|
|||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
||||||
|
|
||||||
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true});
|
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true});
|
||||||
let (response, code) = index.search_post(search.clone()).await;
|
let (response, code) = index.search_post(search.clone()).await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
|
||||||
@ -271,7 +274,7 @@ async fn highlighter() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||||
"hybrid": {"semanticRatio": 0.2},
|
"hybrid": {"embedder": "default", "semanticRatio": 0.2},
|
||||||
"retrieveVectors": true,
|
"retrieveVectors": true,
|
||||||
"attributesToHighlight": [
|
"attributesToHighlight": [
|
||||||
"desc",
|
"desc",
|
||||||
@ -287,7 +290,7 @@ async fn highlighter() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||||
"hybrid": {"semanticRatio": 0.8},
|
"hybrid": {"embedder": "default", "semanticRatio": 0.8},
|
||||||
"retrieveVectors": true,
|
"retrieveVectors": true,
|
||||||
"showRankingScore": true,
|
"showRankingScore": true,
|
||||||
"attributesToHighlight": [
|
"attributesToHighlight": [
|
||||||
@ -304,7 +307,7 @@ async fn highlighter() {
|
|||||||
// no highlighting on full semantic
|
// no highlighting on full semantic
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||||
"hybrid": {"semanticRatio": 1.0},
|
"hybrid": {"embedder": "default", "semanticRatio": 1.0},
|
||||||
"retrieveVectors": true,
|
"retrieveVectors": true,
|
||||||
"showRankingScore": true,
|
"showRankingScore": true,
|
||||||
"attributesToHighlight": [
|
"attributesToHighlight": [
|
||||||
@ -326,7 +329,7 @@ async fn invalid_semantic_ratio() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 1.2}}),
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.2}}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
@ -341,7 +344,7 @@ async fn invalid_semantic_ratio() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": -0.8}}),
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": -0.8}}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
@ -357,7 +360,7 @@ async fn invalid_semantic_ratio() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_get(
|
.search_get(
|
||||||
&yaup::to_string(
|
&yaup::to_string(
|
||||||
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": 1.2}),
|
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": 1.2}),
|
||||||
)
|
)
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
)
|
)
|
||||||
@ -375,7 +378,7 @@ async fn invalid_semantic_ratio() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_get(
|
.search_get(
|
||||||
&yaup::to_string(
|
&yaup::to_string(
|
||||||
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": -0.2}),
|
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": -0.2}),
|
||||||
)
|
)
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
)
|
)
|
||||||
@ -398,7 +401,7 @@ async fn single_document() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
|
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@ -414,7 +417,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// search without query and vector, but with hybrid => still placeholder
|
// search without query and vector, but with hybrid => still placeholder
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
.search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -423,7 +426,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// same with a different semantic ratio
|
// same with a different semantic ratio
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
|
.search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -432,7 +435,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// wrong vector dimensions
|
// wrong vector dimensions
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
@ -447,7 +450,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// full vector
|
// full vector
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -456,7 +459,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// full keyword, without a query
|
// full keyword, without a query
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -465,7 +468,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// query + vector, full keyword => keyword
|
// query + vector, full keyword => keyword
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -480,7 +483,7 @@ async fn query_combination() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.",
|
"message": "Invalid request: missing `hybrid` parameter when `vector` is present.",
|
||||||
"code": "missing_search_hybrid",
|
"code": "missing_search_hybrid",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#missing_search_hybrid"
|
"link": "https://docs.meilisearch.com/errors#missing_search_hybrid"
|
||||||
@ -490,7 +493,7 @@ async fn query_combination() {
|
|||||||
// full vector, without a vector => error
|
// full vector, without a vector => error
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
|
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@ -507,7 +510,7 @@ async fn query_combination() {
|
|||||||
// hybrid without a vector => full keyword
|
// hybrid without a vector => full keyword
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}),
|
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@ -523,7 +526,7 @@ async fn retrieve_vectors() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
|
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -573,7 +576,7 @@ async fn retrieve_vectors() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}),
|
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1099,22 +1099,28 @@ async fn experimental_feature_vector_store() {
|
|||||||
index.add_documents(json!(documents), None).await;
|
index.add_documents(json!(documents), None).await;
|
||||||
index.wait_task(0).await;
|
index.wait_task(0).await;
|
||||||
|
|
||||||
index
|
let (response, code) = index
|
||||||
.search(json!({
|
.search_post(json!({
|
||||||
"vector": [1.0, 2.0, 3.0],
|
"vector": [1.0, 2.0, 3.0],
|
||||||
|
"hybrid": {
|
||||||
|
"embedder": "manual",
|
||||||
|
},
|
||||||
"showRankingScore": true
|
"showRankingScore": true
|
||||||
}), |response, code|{
|
}))
|
||||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
|
||||||
{
|
|
||||||
"message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
|
|
||||||
"code": "feature_not_enabled",
|
|
||||||
"type": "invalid_request",
|
|
||||||
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
|
||||||
}
|
|
||||||
"###);
|
|
||||||
})
|
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
|
{
|
||||||
|
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||||
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
|
||||||
|
"code": "feature_not_enabled",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({
|
.search(json!({
|
||||||
"retrieveVectors": true,
|
"retrieveVectors": true,
|
||||||
@ -1162,6 +1168,9 @@ async fn experimental_feature_vector_store() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"vector": [1.0, 2.0, 3.0],
|
"vector": [1.0, 2.0, 3.0],
|
||||||
|
"hybrid": {
|
||||||
|
"embedder": "manual",
|
||||||
|
},
|
||||||
"showRankingScore": true,
|
"showRankingScore": true,
|
||||||
"retrieveVectors": true,
|
"retrieveVectors": true,
|
||||||
}))
|
}))
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -37,6 +37,43 @@ async fn search_queue_register() {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn search_queue_register_with_explicit_drop() {
|
||||||
|
let queue = SearchQueue::new(4, NonZeroUsize::new(2).unwrap());
|
||||||
|
|
||||||
|
// First, use all the cores
|
||||||
|
let permit1 = queue.try_get_search_permit().await.unwrap();
|
||||||
|
let _permit2 = queue.try_get_search_permit().await.unwrap();
|
||||||
|
|
||||||
|
// If we free one spot we should be able to register one new search
|
||||||
|
permit1.drop().await;
|
||||||
|
|
||||||
|
let permit3 = queue.try_get_search_permit().await.unwrap();
|
||||||
|
|
||||||
|
// And again
|
||||||
|
permit3.drop().await;
|
||||||
|
|
||||||
|
let _permit4 = queue.try_get_search_permit().await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn search_queue_register_with_time_to_abort() {
|
||||||
|
let queue = Arc::new(
|
||||||
|
SearchQueue::new(1, NonZeroUsize::new(1).unwrap())
|
||||||
|
.with_time_to_abort(Duration::from_secs(1)),
|
||||||
|
);
|
||||||
|
|
||||||
|
// First, use all the cores
|
||||||
|
let permit1 = queue.try_get_search_permit().await.unwrap();
|
||||||
|
let q = queue.clone();
|
||||||
|
let permit2 = tokio::task::spawn(async move { q.try_get_search_permit().await });
|
||||||
|
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||||
|
permit1.drop().await;
|
||||||
|
let ret = permit2.await.unwrap();
|
||||||
|
|
||||||
|
snapshot!(ret.unwrap_err(), @"Too many search requests running at the same time: 1. Retry after 10s.");
|
||||||
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn wait_till_cores_are_available() {
|
async fn wait_till_cores_are_available() {
|
||||||
let queue = Arc::new(SearchQueue::new(4, NonZeroUsize::new(1).unwrap()));
|
let queue = Arc::new(SearchQueue::new(4, NonZeroUsize::new(1).unwrap()));
|
||||||
|
@ -190,7 +190,8 @@ async fn secrets_are_hidden_in_settings() {
|
|||||||
"source": "rest",
|
"source": "rest",
|
||||||
"apiKey": "My suXXXXXX...",
|
"apiKey": "My suXXXXXX...",
|
||||||
"dimensions": 4,
|
"dimensions": 4,
|
||||||
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
"documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||||
|
"documentTemplateMaxBytes": 400,
|
||||||
"url": "https://localhost:7777",
|
"url": "https://localhost:7777",
|
||||||
"request": "{{text}}",
|
"request": "{{text}}",
|
||||||
"response": "{{embedding}}",
|
"response": "{{embedding}}",
|
||||||
|
@ -18,7 +18,7 @@ async fn similar_unexisting_index() {
|
|||||||
});
|
});
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947}), |response, code| {
|
.similar(json!({"id": 287947, "embedder": "manual"}), |response, code| {
|
||||||
assert_eq!(code, 404);
|
assert_eq!(code, 404);
|
||||||
assert_eq!(response, expected_response);
|
assert_eq!(response, expected_response);
|
||||||
})
|
})
|
||||||
@ -44,7 +44,7 @@ async fn similar_feature_not_enabled() {
|
|||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
let index = server.index("test");
|
let index = server.index("test");
|
||||||
|
|
||||||
let (response, code) = index.similar_post(json!({"id": 287947})).await;
|
let (response, code) = index.similar_post(json!({"id": 287947, "embedder": "manual"})).await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@ -199,7 +199,8 @@ async fn similar_not_found_id() {
|
|||||||
snapshot!(code, @"202 Accepted");
|
snapshot!(code, @"202 Accepted");
|
||||||
server.wait_task(response.uid()).await;
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
let (response, code) = index.similar_post(json!({"id": "definitely-doesnt-exist"})).await;
|
let (response, code) =
|
||||||
|
index.similar_post(json!({"id": "definitely-doesnt-exist", "embedder": "manual"})).await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@ -230,7 +231,8 @@ async fn similar_bad_offset() {
|
|||||||
snapshot!(code, @"202 Accepted");
|
snapshot!(code, @"202 Accepted");
|
||||||
server.wait_task(response.uid()).await;
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
let (response, code) = index.similar_post(json!({"id": 287947, "offset": "doggo"})).await;
|
let (response, code) =
|
||||||
|
index.similar_post(json!({"id": 287947, "offset": "doggo", "embedder": "manual"})).await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@ -241,7 +243,7 @@ async fn similar_bad_offset() {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
let (response, code) = index.similar_get("?id=287947&offset=doggo").await;
|
let (response, code) = index.similar_get("?id=287947&offset=doggo&embedder=manual").await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@ -272,7 +274,8 @@ async fn similar_bad_limit() {
|
|||||||
snapshot!(code, @"202 Accepted");
|
snapshot!(code, @"202 Accepted");
|
||||||
server.wait_task(response.uid()).await;
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
let (response, code) = index.similar_post(json!({"id": 287947, "limit": "doggo"})).await;
|
let (response, code) =
|
||||||
|
index.similar_post(json!({"id": 287947, "limit": "doggo", "embedder": "manual"})).await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@ -283,7 +286,7 @@ async fn similar_bad_limit() {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
let (response, code) = index.similar_get("?id=287946&limit=doggo").await;
|
let (response, code) = index.similar_get("?id=287946&limit=doggo&embedder=manual").await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@ -323,7 +326,8 @@ async fn similar_bad_filter() {
|
|||||||
snapshot!(code, @"202 Accepted");
|
snapshot!(code, @"202 Accepted");
|
||||||
index.wait_task(value.uid()).await;
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
let (response, code) = index.similar_post(json!({ "id": 287947, "filter": true })).await;
|
let (response, code) =
|
||||||
|
index.similar_post(json!({ "id": 287947, "filter": true, "embedder": "manual" })).await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@ -361,10 +365,10 @@ async fn filter_invalid_syntax_object() {
|
|||||||
index.wait_task(value.uid()).await;
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| {
|
.similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| {
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r###"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||||
"code": "invalid_similar_filter",
|
"code": "invalid_similar_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
@ -400,10 +404,10 @@ async fn filter_invalid_syntax_array() {
|
|||||||
index.wait_task(value.uid()).await;
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| {
|
.similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| {
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r###"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||||
"code": "invalid_similar_filter",
|
"code": "invalid_similar_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
@ -446,7 +450,7 @@ async fn filter_invalid_syntax_string() {
|
|||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 287947, "filter": "title = Glass XOR title = Glass"}),
|
json!({"id": 287947, "filter": "title = Glass XOR title = Glass", "embedder": "manual"}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(response, expected_response);
|
assert_eq!(response, expected_response);
|
||||||
assert_eq!(code, 400);
|
assert_eq!(code, 400);
|
||||||
@ -486,10 +490,13 @@ async fn filter_invalid_attribute_array() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947, "filter": ["many = Glass"]}), |response, code| {
|
.similar(
|
||||||
assert_eq!(response, expected_response);
|
json!({"id": 287947, "filter": ["many = Glass"], "embedder": "manual"}),
|
||||||
assert_eq!(code, 400);
|
|response, code| {
|
||||||
})
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
},
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -524,10 +531,13 @@ async fn filter_invalid_attribute_string() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947, "filter": "many = Glass"}), |response, code| {
|
.similar(
|
||||||
assert_eq!(response, expected_response);
|
json!({"id": 287947, "filter": "many = Glass", "embedder": "manual"}),
|
||||||
assert_eq!(code, 400);
|
|response, code| {
|
||||||
})
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
},
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -562,10 +572,13 @@ async fn filter_reserved_geo_attribute_array() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947, "filter": ["_geo = Glass"]}), |response, code| {
|
.similar(
|
||||||
assert_eq!(response, expected_response);
|
json!({"id": 287947, "filter": ["_geo = Glass"], "embedder": "manual"}),
|
||||||
assert_eq!(code, 400);
|
|response, code| {
|
||||||
})
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
},
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -600,10 +613,13 @@ async fn filter_reserved_geo_attribute_string() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947, "filter": "_geo = Glass"}), |response, code| {
|
.similar(
|
||||||
assert_eq!(response, expected_response);
|
json!({"id": 287947, "filter": "_geo = Glass", "embedder": "manual"}),
|
||||||
assert_eq!(code, 400);
|
|response, code| {
|
||||||
})
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
},
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -638,10 +654,13 @@ async fn filter_reserved_attribute_array() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947, "filter": ["_geoDistance = Glass"]}), |response, code| {
|
.similar(
|
||||||
assert_eq!(response, expected_response);
|
json!({"id": 287947, "filter": ["_geoDistance = Glass"], "embedder": "manual"}),
|
||||||
assert_eq!(code, 400);
|
|response, code| {
|
||||||
})
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
},
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -676,10 +695,13 @@ async fn filter_reserved_attribute_string() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947, "filter": "_geoDistance = Glass"}), |response, code| {
|
.similar(
|
||||||
assert_eq!(response, expected_response);
|
json!({"id": 287947, "filter": "_geoDistance = Glass", "embedder": "manual"}),
|
||||||
assert_eq!(code, 400);
|
|response, code| {
|
||||||
})
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
},
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -714,10 +736,13 @@ async fn filter_reserved_geo_point_array() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947, "filter": ["_geoPoint = Glass"]}), |response, code| {
|
.similar(
|
||||||
assert_eq!(response, expected_response);
|
json!({"id": 287947, "filter": ["_geoPoint = Glass"], "embedder": "manual"}),
|
||||||
assert_eq!(code, 400);
|
|response, code| {
|
||||||
})
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
},
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -752,10 +777,13 @@ async fn filter_reserved_geo_point_string() {
|
|||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
});
|
});
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947, "filter": "_geoPoint = Glass"}), |response, code| {
|
.similar(
|
||||||
assert_eq!(response, expected_response);
|
json!({"id": 287947, "filter": "_geoPoint = Glass", "embedder": "manual"}),
|
||||||
assert_eq!(code, 400);
|
|response, code| {
|
||||||
})
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
},
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -765,7 +793,8 @@ async fn similar_bad_retrieve_vectors() {
|
|||||||
server.set_features(json!({"vectorStore": true})).await;
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
let index = server.index("test");
|
let index = server.index("test");
|
||||||
|
|
||||||
let (response, code) = index.similar_post(json!({"retrieveVectors": "doggo"})).await;
|
let (response, code) =
|
||||||
|
index.similar_post(json!({"retrieveVectors": "doggo", "embedder": "manual"})).await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
@ -776,7 +805,8 @@ async fn similar_bad_retrieve_vectors() {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
let (response, code) = index.similar_post(json!({"retrieveVectors": [true]})).await;
|
let (response, code) =
|
||||||
|
index.similar_post(json!({"retrieveVectors": [true], "embedder": "manual"})).await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
|
@ -80,9 +80,11 @@ async fn basic() {
|
|||||||
index.wait_task(value.uid()).await;
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 143, "retrieveVectors": true}), |response, code| {
|
.similar(
|
||||||
snapshot!(code, @"200 OK");
|
json!({"id": 143, "retrieveVectors": true, "embedder": "manual"}),
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
|response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"title": "Escape Room",
|
"title": "Escape Room",
|
||||||
@ -154,13 +156,16 @@ async fn basic() {
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
})
|
},
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| {
|
.similar(
|
||||||
snapshot!(code, @"200 OK");
|
json!({"id": "299537", "retrieveVectors": true, "embedder": "manual"}),
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
|response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"title": "How to Train Your Dragon: The Hidden World",
|
"title": "How to Train Your Dragon: The Hidden World",
|
||||||
@ -232,7 +237,8 @@ async fn basic() {
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
})
|
},
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -272,7 +278,7 @@ async fn ranking_score_threshold() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true}),
|
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true, "embedder": "manual"}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4");
|
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4");
|
||||||
@ -358,7 +364,7 @@ async fn ranking_score_threshold() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true}),
|
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true, "embedder": "manual"}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3");
|
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3");
|
||||||
@ -426,7 +432,7 @@ async fn ranking_score_threshold() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true}),
|
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true, "embedder": "manual"}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2");
|
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2");
|
||||||
@ -476,7 +482,7 @@ async fn ranking_score_threshold() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true}),
|
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true, "embedder": "manual"}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1");
|
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1");
|
||||||
@ -508,7 +514,7 @@ async fn ranking_score_threshold() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true}),
|
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true, "embedder": "manual"}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response["hits"]), @"[]");
|
snapshot!(json_string!(response["hits"]), @"[]");
|
||||||
@ -553,7 +559,7 @@ async fn filter() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}),
|
json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true, "embedder": "manual"}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
@ -617,7 +623,7 @@ async fn filter() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}),
|
json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true, "embedder": "manual"}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
@ -681,9 +687,11 @@ async fn limit_and_offset() {
|
|||||||
index.wait_task(value.uid()).await;
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 143, "limit": 1, "retrieveVectors": true}), |response, code| {
|
.similar(
|
||||||
snapshot!(code, @"200 OK");
|
json!({"id": 143, "limit": 1, "retrieveVectors": true, "embedder": "manual"}),
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
|response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"title": "Escape Room",
|
"title": "Escape Room",
|
||||||
@ -704,12 +712,13 @@ async fn limit_and_offset() {
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
})
|
},
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(
|
.similar(
|
||||||
json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}),
|
json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true, "embedder": "manual"}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
380
meilisearch/tests/vector/binary_quantized.rs
Normal file
380
meilisearch/tests/vector/binary_quantized.rs
Normal file
@ -0,0 +1,380 @@
|
|||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
|
|
||||||
|
use crate::common::{GetAllDocumentsOptions, Server};
|
||||||
|
use crate::json;
|
||||||
|
use crate::vector::generate_default_user_provided_documents;
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn retrieve_binary_quantize_status_in_the_settings() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(value, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false,
|
||||||
|
"containsFilter": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (settings, code) = index.settings().await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3}"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
"binaryQuantized": false,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (settings, code) = index.settings().await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":false}"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
"binaryQuantized": true,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (settings, code) = index.settings().await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":true}"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn binary_quantize_before_sending_documents() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(value, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false,
|
||||||
|
"containsFilter": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
"binaryQuantized": true,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let documents = json!([
|
||||||
|
{"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }},
|
||||||
|
{"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }},
|
||||||
|
]);
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// Make sure the documents are binary quantized
|
||||||
|
let (documents, _code) = index
|
||||||
|
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||||
|
.await;
|
||||||
|
snapshot!(json_string!(documents), @r###"
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"name": "kefir",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": {
|
||||||
|
"embeddings": [
|
||||||
|
[
|
||||||
|
-1.0,
|
||||||
|
-1.0,
|
||||||
|
1.0
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"regenerate": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"name": "echo",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": {
|
||||||
|
"embeddings": [
|
||||||
|
[
|
||||||
|
1.0,
|
||||||
|
1.0,
|
||||||
|
-1.0
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"regenerate": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"offset": 0,
|
||||||
|
"limit": 20,
|
||||||
|
"total": 2
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn binary_quantize_after_sending_documents() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(value, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false,
|
||||||
|
"containsFilter": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let documents = json!([
|
||||||
|
{"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }},
|
||||||
|
{"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }},
|
||||||
|
]);
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
"binaryQuantized": true,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// Make sure the documents are binary quantized
|
||||||
|
let (documents, _code) = index
|
||||||
|
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||||
|
.await;
|
||||||
|
snapshot!(json_string!(documents), @r###"
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"name": "kefir",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": {
|
||||||
|
"embeddings": [
|
||||||
|
[
|
||||||
|
-1.0,
|
||||||
|
-1.0,
|
||||||
|
1.0
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"regenerate": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"name": "echo",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": {
|
||||||
|
"embeddings": [
|
||||||
|
[
|
||||||
|
1.0,
|
||||||
|
1.0,
|
||||||
|
-1.0
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"regenerate": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"offset": 0,
|
||||||
|
"limit": 20,
|
||||||
|
"total": 2
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn try_to_disable_binary_quantization() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(value, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false,
|
||||||
|
"containsFilter": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
"binaryQuantized": true,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
"binaryQuantized": false,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let ret = server.wait_task(response.uid()).await;
|
||||||
|
snapshot!(ret, @r###"
|
||||||
|
{
|
||||||
|
"uid": "[uid]",
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "failed",
|
||||||
|
"type": "settingsUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
"binaryQuantized": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.",
|
||||||
|
"code": "invalid_settings_embedders",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
|
||||||
|
},
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn binary_quantize_clear_documents() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = generate_default_user_provided_documents(&server).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"binaryQuantized": true,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (value, _code) = index.clear_all_documents().await;
|
||||||
|
index.wait_task(value.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// Make sure the documents DB has been cleared
|
||||||
|
let (documents, _code) = index
|
||||||
|
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||||
|
.await;
|
||||||
|
snapshot!(json_string!(documents), @r###"
|
||||||
|
{
|
||||||
|
"results": [],
|
||||||
|
"offset": 0,
|
||||||
|
"limit": 20,
|
||||||
|
"total": 0
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Make sure the arroy DB has been cleared
|
||||||
|
let (documents, _code) =
|
||||||
|
index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await;
|
||||||
|
snapshot!(documents, @r###"
|
||||||
|
{
|
||||||
|
"hits": [],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 0,
|
||||||
|
"semanticHitCount": 0
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
@ -1,3 +1,4 @@
|
|||||||
|
mod binary_quantized;
|
||||||
mod openai;
|
mod openai;
|
||||||
mod rest;
|
mod rest;
|
||||||
mod settings;
|
mod settings;
|
||||||
@ -624,7 +625,8 @@ async fn clear_documents() {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
// Make sure the arroy DB has been cleared
|
// Make sure the arroy DB has been cleared
|
||||||
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
|
let (documents, _code) =
|
||||||
|
index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "manual"} })).await;
|
||||||
snapshot!(documents, @r###"
|
snapshot!(documents, @r###"
|
||||||
{
|
{
|
||||||
"hits": [],
|
"hits": [],
|
||||||
@ -685,7 +687,11 @@ async fn add_remove_one_vector_4588() {
|
|||||||
let task = index.wait_task(value.uid()).await;
|
let task = index.wait_task(value.uid()).await;
|
||||||
snapshot!(task, name: "document-deleted");
|
snapshot!(task, name: "document-deleted");
|
||||||
|
|
||||||
let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await;
|
let (documents, _code) = index
|
||||||
|
.search_post(
|
||||||
|
json!({"vector": [1, 1, 1], "hybrid": {"semanticRatio": 1.0, "embedder": "manual"} }),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
snapshot!(documents, @r###"
|
snapshot!(documents, @r###"
|
||||||
{
|
{
|
||||||
"hits": [
|
"hits": [
|
||||||
|
@ -302,7 +302,8 @@ async fn create_mock_with_template(
|
|||||||
"source": "openAi",
|
"source": "openAi",
|
||||||
"url": url,
|
"url": url,
|
||||||
"apiKey": API_KEY,
|
"apiKey": API_KEY,
|
||||||
"documentTemplate": document_template
|
"documentTemplate": document_template,
|
||||||
|
"documentTemplateMaxBytes": 8000000,
|
||||||
});
|
});
|
||||||
|
|
||||||
model_dimensions.add_to_settings(&mut embedder_settings);
|
model_dimensions.add_to_settings(&mut embedder_settings);
|
||||||
@ -448,7 +449,7 @@ async fn it_works() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "chien de chasse",
|
"q": "chien de chasse",
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"},
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -488,7 +489,7 @@ async fn it_works() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "petit chien",
|
"q": "petit chien",
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -528,7 +529,7 @@ async fn it_works() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "grand chien de berger des montagnes",
|
"q": "grand chien de berger des montagnes",
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -615,7 +616,7 @@ async fn tokenize_long_text() {
|
|||||||
"q": "grand chien de berger des montagnes",
|
"q": "grand chien de berger des montagnes",
|
||||||
"showRankingScore": true,
|
"showRankingScore": true,
|
||||||
"attributesToRetrieve": ["id"],
|
"attributesToRetrieve": ["id"],
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -693,6 +694,7 @@ async fn bad_api_key() {
|
|||||||
"model": "text-embedding-3-large",
|
"model": "text-embedding-3-large",
|
||||||
"apiKey": "XXX...",
|
"apiKey": "XXX...",
|
||||||
"documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.",
|
"documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.",
|
||||||
|
"documentTemplateMaxBytes": 8000000,
|
||||||
"url": "[url]"
|
"url": "[url]"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -735,6 +737,7 @@ async fn bad_api_key() {
|
|||||||
"source": "openAi",
|
"source": "openAi",
|
||||||
"model": "text-embedding-3-large",
|
"model": "text-embedding-3-large",
|
||||||
"documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.",
|
"documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.",
|
||||||
|
"documentTemplateMaxBytes": 8000000,
|
||||||
"url": "[url]"
|
"url": "[url]"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1061,7 +1064,7 @@ async fn smaller_dimensions() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "chien de chasse",
|
"q": "chien de chasse",
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -1101,7 +1104,7 @@ async fn smaller_dimensions() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "petit chien",
|
"q": "petit chien",
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -1141,7 +1144,7 @@ async fn smaller_dimensions() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "grand chien de berger des montagnes",
|
"q": "grand chien de berger des montagnes",
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -1292,7 +1295,7 @@ async fn small_embedding_model() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "chien de chasse",
|
"q": "chien de chasse",
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -1332,7 +1335,7 @@ async fn small_embedding_model() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "petit chien",
|
"q": "petit chien",
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -1372,7 +1375,7 @@ async fn small_embedding_model() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "grand chien de berger des montagnes",
|
"q": "grand chien de berger des montagnes",
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -1522,7 +1525,7 @@ async fn legacy_embedding_model() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "chien de chasse",
|
"q": "chien de chasse",
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -1562,7 +1565,7 @@ async fn legacy_embedding_model() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "petit chien",
|
"q": "petit chien",
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -1602,7 +1605,7 @@ async fn legacy_embedding_model() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "grand chien de berger des montagnes",
|
"q": "grand chien de berger des montagnes",
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -1753,7 +1756,7 @@ async fn it_still_works() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "chien de chasse",
|
"q": "chien de chasse",
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -1793,7 +1796,7 @@ async fn it_still_works() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "petit chien",
|
"q": "petit chien",
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -1833,7 +1836,7 @@ async fn it_still_works() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"q": "grand chien de berger des montagnes",
|
"q": "grand chien de berger des montagnes",
|
||||||
"hybrid": {"semanticRatio": 1.0}
|
"hybrid": {"semanticRatio": 1.0, "embedder": "default"}
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||||
|
|
||||||
use meili_snap::{json_string, snapshot};
|
use meili_snap::{json_string, snapshot};
|
||||||
@ -37,6 +38,46 @@ async fn create_mock() -> (MockServer, Value) {
|
|||||||
(mock_server, embedder_settings)
|
(mock_server, embedder_settings)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn create_mock_map() -> (MockServer, Value) {
|
||||||
|
let mock_server = MockServer::start().await;
|
||||||
|
|
||||||
|
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||||
|
// text -> embedding
|
||||||
|
("name: kefir\n", [0.0, 0.1, 0.2]),
|
||||||
|
]
|
||||||
|
// turn into btree
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Mock::given(method("POST"))
|
||||||
|
.and(path("/"))
|
||||||
|
.respond_with(move |req: &Request| {
|
||||||
|
let text: String = req.body_json().unwrap();
|
||||||
|
match text_to_embedding.get(text.as_str()) {
|
||||||
|
Some(embedding) => {
|
||||||
|
ResponseTemplate::new(200).set_body_json(json!({ "data": embedding }))
|
||||||
|
}
|
||||||
|
None => ResponseTemplate::new(404)
|
||||||
|
.set_body_json(json!({"error": "text not found", "text": text})),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.mount(&mock_server)
|
||||||
|
.await;
|
||||||
|
let url = mock_server.uri();
|
||||||
|
|
||||||
|
let embedder_settings = json!({
|
||||||
|
"source": "rest",
|
||||||
|
"url": url,
|
||||||
|
"dimensions": 3,
|
||||||
|
"request": "{{text}}",
|
||||||
|
"response": {
|
||||||
|
"data": "{{embedding}}"
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
(mock_server, embedder_settings)
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||||
struct MultipleRequest {
|
struct MultipleRequest {
|
||||||
input: Vec<String>,
|
input: Vec<String>,
|
||||||
@ -1100,6 +1141,7 @@ async fn server_returns_bad_request() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.update_settings(json!({
|
.update_settings(json!({
|
||||||
|
"searchableAttributes": ["name", "missing_field"],
|
||||||
"embedders": {
|
"embedders": {
|
||||||
"rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": "{{embedding}}", "dimensions": 3 }),
|
"rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": "{{embedding}}", "dimensions": 3 }),
|
||||||
},
|
},
|
||||||
@ -1115,6 +1157,10 @@ async fn server_returns_bad_request() {
|
|||||||
"type": "settingsUpdate",
|
"type": "settingsUpdate",
|
||||||
"canceledBy": null,
|
"canceledBy": null,
|
||||||
"details": {
|
"details": {
|
||||||
|
"searchableAttributes": [
|
||||||
|
"name",
|
||||||
|
"missing_field"
|
||||||
|
],
|
||||||
"embedders": {
|
"embedders": {
|
||||||
"rest": {
|
"rest": {
|
||||||
"source": "rest",
|
"source": "rest",
|
||||||
@ -1148,7 +1194,7 @@ async fn server_returns_bad_request() {
|
|||||||
"indexedDocuments": 0
|
"indexedDocuments": 0
|
||||||
},
|
},
|
||||||
"error": {
|
"error": {
|
||||||
"message": "While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\" id: 1\\\\n name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 24\"}`",
|
"message": "While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 15\"}`",
|
||||||
"code": "vector_embedding_error",
|
"code": "vector_embedding_error",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
@ -1891,3 +1937,109 @@ async fn server_custom_header() {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn searchable_reindex() {
|
||||||
|
let (_mock, setting) = create_mock_map().await;
|
||||||
|
let server = get_server_vector().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"searchableAttributes": ["name", "missing_field"],
|
||||||
|
"embedders": {
|
||||||
|
"rest": setting,
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let task = server.wait_task(response.uid()).await;
|
||||||
|
snapshot!(task, @r###"
|
||||||
|
{
|
||||||
|
"uid": "[uid]",
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "succeeded",
|
||||||
|
"type": "settingsUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"searchableAttributes": [
|
||||||
|
"name",
|
||||||
|
"missing_field"
|
||||||
|
],
|
||||||
|
"embedders": {
|
||||||
|
"rest": {
|
||||||
|
"source": "rest",
|
||||||
|
"dimensions": 3,
|
||||||
|
"url": "[url]",
|
||||||
|
"request": "{{text}}",
|
||||||
|
"response": {
|
||||||
|
"data": "{{embedding}}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"error": null,
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) =
|
||||||
|
index.add_documents(json!( { "id": 1, "name": "kefir", "breed": "patou" }), None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let task = server.wait_task(response.uid()).await;
|
||||||
|
snapshot!(task, @r###"
|
||||||
|
{
|
||||||
|
"uid": "[uid]",
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "succeeded",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 1,
|
||||||
|
"indexedDocuments": 1
|
||||||
|
},
|
||||||
|
"error": null,
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// triggers reindexing with the new searchable attribute.
|
||||||
|
// as the mock intentionally doesn't know of this text, the task will fail, outputting the putative rendered text.
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"searchableAttributes": ["breed"],
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let task = server.wait_task(response.uid()).await;
|
||||||
|
snapshot!(task, @r###"
|
||||||
|
{
|
||||||
|
"uid": "[uid]",
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "failed",
|
||||||
|
"type": "settingsUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"searchableAttributes": [
|
||||||
|
"breed"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"message": "While embedding documents for embedder `rest`: error: received unexpected HTTP 404 from embedding server\n - server replied with `{\"error\":\"text not found\",\"text\":\"breed: patou\\n\"}`",
|
||||||
|
"code": "vector_embedding_error",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
|
},
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
@ -218,7 +218,8 @@ async fn reset_embedder_documents() {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
// Make sure the arroy DB has been cleared
|
// Make sure the arroy DB has been cleared
|
||||||
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
|
let (documents, _code) =
|
||||||
|
index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "default"} })).await;
|
||||||
snapshot!(json_string!(documents), @r###"
|
snapshot!(json_string!(documents), @r###"
|
||||||
{
|
{
|
||||||
"message": "Cannot find embedder with name `default`.",
|
"message": "Cannot find embedder with name `default`.",
|
||||||
|
@ -15,5 +15,6 @@ dump = { path = "../dump" }
|
|||||||
file-store = { path = "../file-store" }
|
file-store = { path = "../file-store" }
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
|
serde = { version = "1.0.209", features = ["derive"] }
|
||||||
time = { version = "0.3.36", features = ["formatting"] }
|
time = { version = "0.3.36", features = ["formatting"] }
|
||||||
uuid = { version = "1.10.0", features = ["v4"], default-features = false }
|
uuid = { version = "1.10.0", features = ["v4"], default-features = false }
|
||||||
|
@ -2,7 +2,7 @@ use std::fs::{read_dir, read_to_string, remove_file, File};
|
|||||||
use std::io::BufWriter;
|
use std::io::BufWriter;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::{bail, Context};
|
||||||
use clap::{Parser, Subcommand};
|
use clap::{Parser, Subcommand};
|
||||||
use dump::{DumpWriter, IndexMetadata};
|
use dump::{DumpWriter, IndexMetadata};
|
||||||
use file_store::FileStore;
|
use file_store::FileStore;
|
||||||
@ -10,9 +10,10 @@ use meilisearch_auth::AuthController;
|
|||||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||||
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
|
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
|
||||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||||
|
use meilisearch_types::milli::index::{db_name, main_key};
|
||||||
use meilisearch_types::milli::{obkv_to_json, BEU32};
|
use meilisearch_types::milli::{obkv_to_json, BEU32};
|
||||||
use meilisearch_types::tasks::{Status, Task};
|
use meilisearch_types::tasks::{Status, Task};
|
||||||
use meilisearch_types::versioning::check_version_file;
|
use meilisearch_types::versioning::{create_version_file, get_version, parse_version};
|
||||||
use meilisearch_types::Index;
|
use meilisearch_types::Index;
|
||||||
use time::macros::format_description;
|
use time::macros::format_description;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
@ -62,21 +63,458 @@ enum Command {
|
|||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
skip_enqueued_tasks: bool,
|
skip_enqueued_tasks: bool,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Attempts to upgrade from one major version to the next without a dump.
|
||||||
|
///
|
||||||
|
/// Make sure to run this commmand when Meilisearch is not running!
|
||||||
|
/// If Meilisearch is running while executing this command, the database could be corrupted
|
||||||
|
/// (contain data from both the old and the new versions)
|
||||||
|
///
|
||||||
|
/// Supported upgrade paths:
|
||||||
|
///
|
||||||
|
/// - v1.9.0 -> v1.10.0
|
||||||
|
OfflineUpgrade {
|
||||||
|
#[arg(long)]
|
||||||
|
target_version: String,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> anyhow::Result<()> {
|
fn main() -> anyhow::Result<()> {
|
||||||
let Cli { db_path, command } = Cli::parse();
|
let Cli { db_path, command } = Cli::parse();
|
||||||
|
|
||||||
check_version_file(&db_path).context("While checking the version file")?;
|
let detected_version = get_version(&db_path).context("While checking the version file")?;
|
||||||
|
|
||||||
match command {
|
match command {
|
||||||
Command::ClearTaskQueue => clear_task_queue(db_path),
|
Command::ClearTaskQueue => clear_task_queue(db_path),
|
||||||
Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
|
Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
|
||||||
export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
|
export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
|
||||||
}
|
}
|
||||||
|
Command::OfflineUpgrade { target_version } => {
|
||||||
|
let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?;
|
||||||
|
OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct OfflineUpgrade {
|
||||||
|
db_path: PathBuf,
|
||||||
|
current_version: (String, String, String),
|
||||||
|
target_version: (String, String, String),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OfflineUpgrade {
|
||||||
|
fn upgrade(self) -> anyhow::Result<()> {
|
||||||
|
// TODO: if we make this process support more versions, introduce a more flexible way of checking for the version
|
||||||
|
// currently only supports v1.9 to v1.10
|
||||||
|
let (current_major, current_minor, current_patch) = &self.current_version;
|
||||||
|
|
||||||
|
match (current_major.as_str(), current_minor.as_str(), current_patch.as_str()) {
|
||||||
|
("1", "9", _) => {}
|
||||||
|
_ => {
|
||||||
|
bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let (target_major, target_minor, target_patch) = &self.target_version;
|
||||||
|
|
||||||
|
match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) {
|
||||||
|
("1", "10", _) => {}
|
||||||
|
_ => {
|
||||||
|
bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("Upgrading from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}");
|
||||||
|
|
||||||
|
self.v1_9_to_v1_10()?;
|
||||||
|
|
||||||
|
println!("Writing VERSION file");
|
||||||
|
|
||||||
|
create_version_file(&self.db_path, target_major, target_minor, target_patch)
|
||||||
|
.context("while writing VERSION file after the upgrade")?;
|
||||||
|
|
||||||
|
println!("Success");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn v1_9_to_v1_10(&self) -> anyhow::Result<()> {
|
||||||
|
// 2 changes here
|
||||||
|
|
||||||
|
// 1. date format. needs to be done before opening the Index
|
||||||
|
// 2. REST embedders. We don't support this case right now, so bail
|
||||||
|
|
||||||
|
let index_scheduler_path = self.db_path.join("tasks");
|
||||||
|
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
|
||||||
|
.with_context(|| {
|
||||||
|
format!("While trying to open {:?}", index_scheduler_path.display())
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let mut sched_wtxn = env.write_txn()?;
|
||||||
|
|
||||||
|
let index_mapping: Database<Str, UuidCodec> =
|
||||||
|
try_opening_database(&env, &sched_wtxn, "index-mapping")?;
|
||||||
|
|
||||||
|
let index_stats: Database<UuidCodec, Unspecified> =
|
||||||
|
try_opening_database(&env, &sched_wtxn, "index-stats").with_context(|| {
|
||||||
|
format!("While trying to open {:?}", index_scheduler_path.display())
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let index_count =
|
||||||
|
index_mapping.len(&sched_wtxn).context("while reading the number of indexes")?;
|
||||||
|
|
||||||
|
// FIXME: not ideal, we have to pre-populate all indexes to prevent double borrow of sched_wtxn
|
||||||
|
// 1. immutably for the iteration
|
||||||
|
// 2. mutably for updating index stats
|
||||||
|
let indexes: Vec<_> = index_mapping
|
||||||
|
.iter(&sched_wtxn)?
|
||||||
|
.map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid)))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut rest_embedders = Vec::new();
|
||||||
|
|
||||||
|
let mut unwrapped_indexes = Vec::new();
|
||||||
|
|
||||||
|
// check that update can take place
|
||||||
|
for (index_index, result) in indexes.into_iter().enumerate() {
|
||||||
|
let (uid, uuid) = result?;
|
||||||
|
let index_path = self.db_path.join("indexes").join(uuid.to_string());
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"[{}/{index_count}]Checking that update can take place for `{uid}` at `{}`",
|
||||||
|
index_index + 1,
|
||||||
|
index_path.display()
|
||||||
|
);
|
||||||
|
|
||||||
|
let index_env = unsafe {
|
||||||
|
// FIXME: fetch the 25 magic number from the index file
|
||||||
|
EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
|
||||||
|
format!("while opening index {uid} at '{}'", index_path.display())
|
||||||
|
})?
|
||||||
|
};
|
||||||
|
|
||||||
|
let index_txn = index_env.read_txn().with_context(|| {
|
||||||
|
format!(
|
||||||
|
"while obtaining a write transaction for index {uid} at {}",
|
||||||
|
index_path.display()
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
println!("\t- Checking for incompatible embedders (REST embedders)");
|
||||||
|
let rest_embedders_for_index = find_rest_embedders(&uid, &index_env, &index_txn)?;
|
||||||
|
|
||||||
|
if rest_embedders_for_index.is_empty() {
|
||||||
|
unwrapped_indexes.push((uid, uuid));
|
||||||
|
} else {
|
||||||
|
// no need to add to unwrapped indexes because we'll exit early
|
||||||
|
rest_embedders.push((uid, rest_embedders_for_index));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !rest_embedders.is_empty() {
|
||||||
|
let rest_embedders = rest_embedders
|
||||||
|
.into_iter()
|
||||||
|
.flat_map(|(index, embedders)| std::iter::repeat(index.clone()).zip(embedders))
|
||||||
|
.map(|(index, embedder)| format!("\t- embedder `{embedder}` in index `{index}`"))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("\n");
|
||||||
|
bail!("The update cannot take place because there are REST embedder(s). Remove them before proceeding with the update:\n{rest_embedders}\n\n\
|
||||||
|
The database has not been modified and is still a valid v1.9 database.");
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("Update can take place, updating");
|
||||||
|
|
||||||
|
for (index_index, (uid, uuid)) in unwrapped_indexes.into_iter().enumerate() {
|
||||||
|
let index_path = self.db_path.join("indexes").join(uuid.to_string());
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"[{}/{index_count}]Updating index `{uid}` at `{}`",
|
||||||
|
index_index + 1,
|
||||||
|
index_path.display()
|
||||||
|
);
|
||||||
|
|
||||||
|
let index_env = unsafe {
|
||||||
|
// FIXME: fetch the 25 magic number from the index file
|
||||||
|
EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| {
|
||||||
|
format!("while opening index {uid} at '{}'", index_path.display())
|
||||||
|
})?
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut index_wtxn = index_env.write_txn().with_context(|| {
|
||||||
|
format!(
|
||||||
|
"while obtaining a write transaction for index `{uid}` at `{}`",
|
||||||
|
index_path.display()
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
println!("\t- Updating index stats");
|
||||||
|
update_index_stats(index_stats, &uid, uuid, &mut sched_wtxn)?;
|
||||||
|
println!("\t- Updating date format");
|
||||||
|
update_date_format(&uid, &index_env, &mut index_wtxn)?;
|
||||||
|
|
||||||
|
index_wtxn.commit().with_context(|| {
|
||||||
|
format!(
|
||||||
|
"while committing the write txn for index `{uid}` at {}",
|
||||||
|
index_path.display()
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
sched_wtxn.commit().context("while committing the write txn for the index-scheduler")?;
|
||||||
|
|
||||||
|
println!("Upgrading database succeeded");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub mod v1_9 {
|
||||||
|
pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
|
||||||
|
|
||||||
|
/// The statistics that can be computed from an `Index` object.
|
||||||
|
#[derive(serde::Serialize, serde::Deserialize, Debug)]
|
||||||
|
pub struct IndexStats {
|
||||||
|
/// Number of documents in the index.
|
||||||
|
pub number_of_documents: u64,
|
||||||
|
/// Size taken up by the index' DB, in bytes.
|
||||||
|
///
|
||||||
|
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
|
||||||
|
/// are not returned to the disk after a deletion, this number is typically larger than
|
||||||
|
/// `used_database_size` that only includes the size of the used pages.
|
||||||
|
pub database_size: u64,
|
||||||
|
/// Size taken by the used pages of the index' DB, in bytes.
|
||||||
|
///
|
||||||
|
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
|
||||||
|
/// this value is typically smaller than `database_size`.
|
||||||
|
pub used_database_size: u64,
|
||||||
|
/// Association of every field name with the number of times it occurs in the documents.
|
||||||
|
pub field_distribution: FieldDistribution,
|
||||||
|
/// Creation date of the index.
|
||||||
|
pub created_at: time::OffsetDateTime,
|
||||||
|
/// Date of the last update of the index.
|
||||||
|
pub updated_at: time::OffsetDateTime,
|
||||||
|
}
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Serialize)]
|
||||||
|
pub struct IndexEmbeddingConfig {
|
||||||
|
pub name: String,
|
||||||
|
pub config: EmbeddingConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
|
||||||
|
pub struct EmbeddingConfig {
|
||||||
|
/// Options of the embedder, specific to each kind of embedder
|
||||||
|
pub embedder_options: EmbedderOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Options of an embedder, specific to each kind of embedder.
|
||||||
|
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
|
||||||
|
pub enum EmbedderOptions {
|
||||||
|
HuggingFace(hf::EmbedderOptions),
|
||||||
|
OpenAi(openai::EmbedderOptions),
|
||||||
|
Ollama(ollama::EmbedderOptions),
|
||||||
|
UserProvided(manual::EmbedderOptions),
|
||||||
|
Rest(rest::EmbedderOptions),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for EmbedderOptions {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::OpenAi(openai::EmbedderOptions { api_key: None, dimensions: None })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mod hf {
|
||||||
|
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
|
||||||
|
pub struct EmbedderOptions {
|
||||||
|
pub model: String,
|
||||||
|
pub revision: Option<String>,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mod openai {
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
|
||||||
|
pub struct EmbedderOptions {
|
||||||
|
pub api_key: Option<String>,
|
||||||
|
pub dimensions: Option<usize>,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mod ollama {
|
||||||
|
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
|
||||||
|
pub struct EmbedderOptions {
|
||||||
|
pub embedding_model: String,
|
||||||
|
pub url: Option<String>,
|
||||||
|
pub api_key: Option<String>,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mod manual {
|
||||||
|
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
|
||||||
|
pub struct EmbedderOptions {
|
||||||
|
pub dimensions: usize,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mod rest {
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize, serde::Serialize, Hash)]
|
||||||
|
pub struct EmbedderOptions {
|
||||||
|
pub api_key: Option<String>,
|
||||||
|
pub dimensions: Option<usize>,
|
||||||
|
pub url: String,
|
||||||
|
pub input_field: Vec<String>,
|
||||||
|
// path to the array of embeddings
|
||||||
|
pub path_to_embeddings: Vec<String>,
|
||||||
|
// shape of a single embedding
|
||||||
|
pub embedding_object: Vec<String>,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type OffsetDateTime = time::OffsetDateTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub mod v1_10 {
|
||||||
|
use crate::v1_9;
|
||||||
|
|
||||||
|
pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
|
||||||
|
|
||||||
|
/// The statistics that can be computed from an `Index` object.
|
||||||
|
#[derive(serde::Serialize, serde::Deserialize, Debug)]
|
||||||
|
pub struct IndexStats {
|
||||||
|
/// Number of documents in the index.
|
||||||
|
pub number_of_documents: u64,
|
||||||
|
/// Size taken up by the index' DB, in bytes.
|
||||||
|
///
|
||||||
|
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
|
||||||
|
/// are not returned to the disk after a deletion, this number is typically larger than
|
||||||
|
/// `used_database_size` that only includes the size of the used pages.
|
||||||
|
pub database_size: u64,
|
||||||
|
/// Size taken by the used pages of the index' DB, in bytes.
|
||||||
|
///
|
||||||
|
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
|
||||||
|
/// this value is typically smaller than `database_size`.
|
||||||
|
pub used_database_size: u64,
|
||||||
|
/// Association of every field name with the number of times it occurs in the documents.
|
||||||
|
pub field_distribution: FieldDistribution,
|
||||||
|
/// Creation date of the index.
|
||||||
|
#[serde(with = "time::serde::rfc3339")]
|
||||||
|
pub created_at: time::OffsetDateTime,
|
||||||
|
/// Date of the last update of the index.
|
||||||
|
#[serde(with = "time::serde::rfc3339")]
|
||||||
|
pub updated_at: time::OffsetDateTime,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<v1_9::IndexStats> for IndexStats {
|
||||||
|
fn from(
|
||||||
|
v1_9::IndexStats {
|
||||||
|
number_of_documents,
|
||||||
|
database_size,
|
||||||
|
used_database_size,
|
||||||
|
field_distribution,
|
||||||
|
created_at,
|
||||||
|
updated_at,
|
||||||
|
}: v1_9::IndexStats,
|
||||||
|
) -> Self {
|
||||||
|
IndexStats {
|
||||||
|
number_of_documents,
|
||||||
|
database_size,
|
||||||
|
used_database_size,
|
||||||
|
field_distribution,
|
||||||
|
created_at,
|
||||||
|
updated_at,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Serialize, serde::Deserialize)]
|
||||||
|
#[serde(transparent)]
|
||||||
|
pub struct OffsetDateTime(#[serde(with = "time::serde::rfc3339")] pub time::OffsetDateTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn update_index_stats(
|
||||||
|
index_stats: Database<UuidCodec, Unspecified>,
|
||||||
|
index_uid: &str,
|
||||||
|
index_uuid: uuid::Uuid,
|
||||||
|
sched_wtxn: &mut RwTxn,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let ctx = || format!("while updating index stats for index `{index_uid}`");
|
||||||
|
|
||||||
|
let stats: Option<v1_9::IndexStats> = index_stats
|
||||||
|
.remap_data_type::<SerdeJson<v1_9::IndexStats>>()
|
||||||
|
.get(sched_wtxn, &index_uuid)
|
||||||
|
.with_context(ctx)?;
|
||||||
|
|
||||||
|
if let Some(stats) = stats {
|
||||||
|
let stats: v1_10::IndexStats = stats.into();
|
||||||
|
|
||||||
|
index_stats
|
||||||
|
.remap_data_type::<SerdeJson<v1_10::IndexStats>>()
|
||||||
|
.put(sched_wtxn, &index_uuid, &stats)
|
||||||
|
.with_context(ctx)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn update_date_format(
|
||||||
|
index_uid: &str,
|
||||||
|
index_env: &Env,
|
||||||
|
index_wtxn: &mut RwTxn,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN)
|
||||||
|
.with_context(|| format!("while updating date format for index `{index_uid}`"))?;
|
||||||
|
|
||||||
|
date_round_trip(index_wtxn, index_uid, main, main_key::CREATED_AT_KEY)?;
|
||||||
|
date_round_trip(index_wtxn, index_uid, main, main_key::UPDATED_AT_KEY)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_rest_embedders(
|
||||||
|
index_uid: &str,
|
||||||
|
index_env: &Env,
|
||||||
|
index_txn: &RoTxn,
|
||||||
|
) -> anyhow::Result<Vec<String>> {
|
||||||
|
let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN)
|
||||||
|
.with_context(|| format!("while checking REST embedders for index `{index_uid}`"))?;
|
||||||
|
|
||||||
|
let mut rest_embedders = vec![];
|
||||||
|
|
||||||
|
for config in main
|
||||||
|
.remap_types::<Str, SerdeJson<Vec<v1_9::IndexEmbeddingConfig>>>()
|
||||||
|
.get(index_txn, main_key::EMBEDDING_CONFIGS)?
|
||||||
|
.unwrap_or_default()
|
||||||
|
{
|
||||||
|
if let v1_9::EmbedderOptions::Rest(_) = config.config.embedder_options {
|
||||||
|
rest_embedders.push(config.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(rest_embedders)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn date_round_trip(
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
index_uid: &str,
|
||||||
|
db: Database<Unspecified, Unspecified>,
|
||||||
|
key: &str,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let datetime =
|
||||||
|
db.remap_types::<Str, SerdeJson<v1_9::OffsetDateTime>>().get(wtxn, key).with_context(
|
||||||
|
|| format!("could not read `{key}` while updating date format for index `{index_uid}`"),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
if let Some(datetime) = datetime {
|
||||||
|
db.remap_types::<Str, SerdeJson<v1_10::OffsetDateTime>>()
|
||||||
|
.put(wtxn, key, &v1_10::OffsetDateTime(datetime))
|
||||||
|
.with_context(|| {
|
||||||
|
format!(
|
||||||
|
"could not write `{key}` while updating date format for index `{index_uid}`"
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Clears the task queue located at `db_path`.
|
/// Clears the task queue located at `db_path`.
|
||||||
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
|
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
|
||||||
let path = db_path.join("tasks");
|
let path = db_path.join("tasks");
|
||||||
|
@ -83,7 +83,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
|
|||||||
tiktoken-rs = "0.5.9"
|
tiktoken-rs = "0.5.9"
|
||||||
liquid = "0.26.6"
|
liquid = "0.26.6"
|
||||||
rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
|
rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
|
||||||
arroy = "0.4.0"
|
arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
tracing = "0.1.40"
|
tracing = "0.1.40"
|
||||||
ureq = { version = "2.10.0", features = ["json"] }
|
ureq = { version = "2.10.0", features = ["json"] }
|
||||||
@ -108,6 +108,8 @@ all-tokenizations = [
|
|||||||
"charabia/greek",
|
"charabia/greek",
|
||||||
"charabia/khmer",
|
"charabia/khmer",
|
||||||
"charabia/vietnamese",
|
"charabia/vietnamese",
|
||||||
|
"charabia/swedish-recomposition",
|
||||||
|
"charabia/german-segmentation",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
||||||
@ -140,6 +142,9 @@ khmer = ["charabia/khmer"]
|
|||||||
# allow vietnamese specialized tokenization
|
# allow vietnamese specialized tokenization
|
||||||
vietnamese = ["charabia/vietnamese"]
|
vietnamese = ["charabia/vietnamese"]
|
||||||
|
|
||||||
|
# allow german specialized tokenization
|
||||||
|
german = ["charabia/german-segmentation"]
|
||||||
|
|
||||||
# force swedish character recomposition
|
# force swedish character recomposition
|
||||||
swedish-recomposition = ["charabia/swedish-recomposition"]
|
swedish-recomposition = ["charabia/swedish-recomposition"]
|
||||||
|
|
||||||
|
@ -258,6 +258,12 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
|||||||
},
|
},
|
||||||
#[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
|
#[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
|
||||||
InvalidSettingsDimensions { embedder_name: String },
|
InvalidSettingsDimensions { embedder_name: String },
|
||||||
|
#[error(
|
||||||
|
"`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors."
|
||||||
|
)]
|
||||||
|
InvalidDisableBinaryQuantization { embedder_name: String },
|
||||||
|
#[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")]
|
||||||
|
InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String },
|
||||||
#[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
|
#[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
|
||||||
InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String },
|
InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String },
|
||||||
#[error("Document editions cannot modify a document's primary key")]
|
#[error("Document editions cannot modify a document's primary key")]
|
||||||
|
@ -21,7 +21,7 @@ use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
|
|||||||
use crate::order_by_map::OrderByMap;
|
use crate::order_by_map::OrderByMap;
|
||||||
use crate::proximity::ProximityPrecision;
|
use crate::proximity::ProximityPrecision;
|
||||||
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
|
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
|
||||||
use crate::vector::{Embedding, EmbeddingConfig};
|
use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig};
|
||||||
use crate::{
|
use crate::{
|
||||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||||
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
||||||
@ -162,7 +162,7 @@ pub struct Index {
|
|||||||
/// Maps an embedder name to its id in the arroy store.
|
/// Maps an embedder name to its id in the arroy store.
|
||||||
pub embedder_category_id: Database<Str, U8>,
|
pub embedder_category_id: Database<Str, U8>,
|
||||||
/// Vector store based on arroy™.
|
/// Vector store based on arroy™.
|
||||||
pub vector_arroy: arroy::Database<arroy::distances::Angular>,
|
pub vector_arroy: arroy::Database<Unspecified>,
|
||||||
|
|
||||||
/// Maps the document id to the document as an obkv store.
|
/// Maps the document id to the document as an obkv store.
|
||||||
pub(crate) documents: Database<BEU32, ObkvCodec>,
|
pub(crate) documents: Database<BEU32, ObkvCodec>,
|
||||||
@ -1622,15 +1622,17 @@ impl Index {
|
|||||||
&'a self,
|
&'a self,
|
||||||
rtxn: &'a RoTxn<'a>,
|
rtxn: &'a RoTxn<'a>,
|
||||||
embedder_id: u8,
|
embedder_id: u8,
|
||||||
) -> impl Iterator<Item = Result<arroy::Reader<'a, arroy::distances::Angular>>> + 'a {
|
quantized: bool,
|
||||||
|
) -> impl Iterator<Item = Result<ArroyWrapper>> + 'a {
|
||||||
crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
|
crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
|
||||||
arroy::Reader::open(rtxn, k, self.vector_arroy)
|
let reader = ArroyWrapper::new(self.vector_arroy, k, quantized);
|
||||||
.map(Some)
|
// Here we don't care about the dimensions, but we want to know if we can read
|
||||||
.or_else(|e| match e {
|
// in the database or if its metadata are missing because there is no document with that many vectors.
|
||||||
arroy::Error::MissingMetadata(_) => Ok(None),
|
match reader.dimensions(rtxn) {
|
||||||
e => Err(e.into()),
|
Ok(_) => Some(Ok(reader)),
|
||||||
})
|
Err(arroy::Error::MissingMetadata(_)) => None,
|
||||||
.transpose()
|
Err(e) => Some(Err(e.into())),
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1652,32 +1654,18 @@ impl Index {
|
|||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
) -> Result<BTreeMap<String, Vec<Embedding>>> {
|
) -> Result<BTreeMap<String, Vec<Embedding>>> {
|
||||||
let mut res = BTreeMap::new();
|
let mut res = BTreeMap::new();
|
||||||
for row in self.embedder_category_id.iter(rtxn)? {
|
let embedding_configs = self.embedding_configs(rtxn)?;
|
||||||
let (embedder_name, embedder_id) = row?;
|
for config in embedding_configs {
|
||||||
let embedder_id = (embedder_id as u16) << 8;
|
let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
|
||||||
let mut embeddings = Vec::new();
|
let embeddings = self
|
||||||
'vectors: for i in 0..=u8::MAX {
|
.arroy_readers(rtxn, embedder_id, config.config.quantized())
|
||||||
let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
|
.map_while(|reader| {
|
||||||
.map(Some)
|
reader
|
||||||
.or_else(|e| match e {
|
.and_then(|r| r.item_vector(rtxn, docid).map_err(|e| e.into()))
|
||||||
arroy::Error::MissingMetadata(_) => Ok(None),
|
.transpose()
|
||||||
e => Err(e),
|
})
|
||||||
})
|
.collect::<Result<Vec<_>>>()?;
|
||||||
.transpose();
|
res.insert(config.name.to_owned(), embeddings);
|
||||||
|
|
||||||
let Some(reader) = reader else {
|
|
||||||
break 'vectors;
|
|
||||||
};
|
|
||||||
|
|
||||||
let embedding = reader?.item_vector(rtxn, docid)?;
|
|
||||||
if let Some(embedding) = embedding {
|
|
||||||
embeddings.push(embedding)
|
|
||||||
} else {
|
|
||||||
break 'vectors;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
res.insert(embedder_name.to_owned(), embeddings);
|
|
||||||
}
|
}
|
||||||
Ok(res)
|
Ok(res)
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@ use liquid::{ObjectView, ValueView};
|
|||||||
|
|
||||||
use super::document::Document;
|
use super::document::Document;
|
||||||
use super::fields::Fields;
|
use super::fields::Fields;
|
||||||
use crate::FieldsIdsMap;
|
use super::FieldsIdsMapWithMetadata;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Context<'a> {
|
pub struct Context<'a> {
|
||||||
@ -14,7 +14,7 @@ pub struct Context<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Context<'a> {
|
impl<'a> Context<'a> {
|
||||||
pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMap) -> Self {
|
pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self {
|
||||||
Self { document, fields: Fields::new(document, field_id_map) }
|
Self { document, fields: Fields::new(document, field_id_map) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,16 +4,20 @@ use liquid::model::{
|
|||||||
use liquid::{ObjectView, ValueView};
|
use liquid::{ObjectView, ValueView};
|
||||||
|
|
||||||
use super::document::Document;
|
use super::document::Document;
|
||||||
use crate::FieldsIdsMap;
|
use super::{FieldMetadata, FieldsIdsMapWithMetadata};
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Fields<'a>(Vec<FieldValue<'a>>);
|
pub struct Fields<'a>(Vec<FieldValue<'a>>);
|
||||||
|
|
||||||
impl<'a> Fields<'a> {
|
impl<'a> Fields<'a> {
|
||||||
pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMap) -> Self {
|
pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self {
|
||||||
Self(
|
Self(
|
||||||
std::iter::repeat(document)
|
std::iter::repeat(document)
|
||||||
.zip(field_id_map.iter())
|
.zip(field_id_map.iter())
|
||||||
.map(|(document, (_fid, name))| FieldValue { document, name })
|
.map(|(document, (fid, name))| FieldValue {
|
||||||
|
document,
|
||||||
|
name,
|
||||||
|
metadata: field_id_map.metadata(fid).unwrap_or_default(),
|
||||||
|
})
|
||||||
.collect(),
|
.collect(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -23,6 +27,7 @@ impl<'a> Fields<'a> {
|
|||||||
pub struct FieldValue<'a> {
|
pub struct FieldValue<'a> {
|
||||||
name: &'a str,
|
name: &'a str,
|
||||||
document: &'a Document<'a>,
|
document: &'a Document<'a>,
|
||||||
|
metadata: FieldMetadata,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> ValueView for FieldValue<'a> {
|
impl<'a> ValueView for FieldValue<'a> {
|
||||||
@ -74,6 +79,10 @@ impl<'a> FieldValue<'a> {
|
|||||||
self.document.get(self.name).unwrap_or(&LiquidValue::Nil)
|
self.document.get(self.name).unwrap_or(&LiquidValue::Nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_searchable(&self) -> &bool {
|
||||||
|
&self.metadata.searchable
|
||||||
|
}
|
||||||
|
|
||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(&self) -> bool {
|
||||||
self.size() == 0
|
self.size() == 0
|
||||||
}
|
}
|
||||||
@ -89,12 +98,14 @@ impl<'a> ObjectView for FieldValue<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
|
fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
|
||||||
Box::new(["name", "value"].iter().map(|&x| KStringCow::from_static(x)))
|
Box::new(["name", "value", "is_searchable"].iter().map(|&x| KStringCow::from_static(x)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
|
fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
|
||||||
Box::new(
|
Box::new(
|
||||||
std::iter::once(self.name() as &dyn ValueView).chain(std::iter::once(self.value())),
|
std::iter::once(self.name() as &dyn ValueView)
|
||||||
|
.chain(std::iter::once(self.value()))
|
||||||
|
.chain(std::iter::once(self.is_searchable() as &dyn ValueView)),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,13 +114,14 @@ impl<'a> ObjectView for FieldValue<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn contains_key(&self, index: &str) -> bool {
|
fn contains_key(&self, index: &str) -> bool {
|
||||||
index == "name" || index == "value"
|
index == "name" || index == "value" || index == "is_searchable"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
|
fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
|
||||||
match index {
|
match index {
|
||||||
"name" => Some(self.name()),
|
"name" => Some(self.name()),
|
||||||
"value" => Some(self.value()),
|
"value" => Some(self.value()),
|
||||||
|
"is_searchable" => Some(self.is_searchable()),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,28 +4,33 @@ pub(crate) mod error;
|
|||||||
mod fields;
|
mod fields;
|
||||||
mod template_checker;
|
mod template_checker;
|
||||||
|
|
||||||
|
use std::collections::BTreeMap;
|
||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
|
use std::ops::Deref;
|
||||||
|
|
||||||
use error::{NewPromptError, RenderPromptError};
|
use error::{NewPromptError, RenderPromptError};
|
||||||
|
|
||||||
use self::context::Context;
|
use self::context::Context;
|
||||||
use self::document::Document;
|
use self::document::Document;
|
||||||
use crate::update::del_add::DelAdd;
|
use crate::update::del_add::DelAdd;
|
||||||
use crate::FieldsIdsMap;
|
use crate::{FieldId, FieldsIdsMap};
|
||||||
|
|
||||||
pub struct Prompt {
|
pub struct Prompt {
|
||||||
template: liquid::Template,
|
template: liquid::Template,
|
||||||
template_text: String,
|
template_text: String,
|
||||||
|
max_bytes: Option<NonZeroUsize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||||
pub struct PromptData {
|
pub struct PromptData {
|
||||||
pub template: String,
|
pub template: String,
|
||||||
|
pub max_bytes: Option<NonZeroUsize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Prompt> for PromptData {
|
impl From<Prompt> for PromptData {
|
||||||
fn from(value: Prompt) -> Self {
|
fn from(value: Prompt) -> Self {
|
||||||
Self { template: value.template_text }
|
Self { template: value.template_text, max_bytes: value.max_bytes }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -33,14 +38,18 @@ impl TryFrom<PromptData> for Prompt {
|
|||||||
type Error = NewPromptError;
|
type Error = NewPromptError;
|
||||||
|
|
||||||
fn try_from(value: PromptData) -> Result<Self, Self::Error> {
|
fn try_from(value: PromptData) -> Result<Self, Self::Error> {
|
||||||
Prompt::new(value.template)
|
Prompt::new(value.template, value.max_bytes)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Clone for Prompt {
|
impl Clone for Prompt {
|
||||||
fn clone(&self) -> Self {
|
fn clone(&self) -> Self {
|
||||||
let template_text = self.template_text.clone();
|
let template_text = self.template_text.clone();
|
||||||
Self { template: new_template(&template_text).unwrap(), template_text }
|
Self {
|
||||||
|
template: new_template(&template_text).unwrap(),
|
||||||
|
template_text,
|
||||||
|
max_bytes: self.max_bytes,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -53,25 +62,35 @@ fn default_template() -> liquid::Template {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn default_template_text() -> &'static str {
|
fn default_template_text() -> &'static str {
|
||||||
"{% for field in fields %} \
|
"{% for field in fields %}\
|
||||||
|
{% if field.is_searchable and field.value != nil %}\
|
||||||
{{ field.name }}: {{ field.value }}\n\
|
{{ field.name }}: {{ field.value }}\n\
|
||||||
|
{% endif %}\
|
||||||
{% endfor %}"
|
{% endfor %}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn default_max_bytes() -> NonZeroUsize {
|
||||||
|
NonZeroUsize::new(400).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
impl Default for Prompt {
|
impl Default for Prompt {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self { template: default_template(), template_text: default_template_text().into() }
|
Self {
|
||||||
|
template: default_template(),
|
||||||
|
template_text: default_template_text().into(),
|
||||||
|
max_bytes: Some(default_max_bytes()),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for PromptData {
|
impl Default for PromptData {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self { template: default_template_text().into() }
|
Self { template: default_template_text().into(), max_bytes: Some(default_max_bytes()) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Prompt {
|
impl Prompt {
|
||||||
pub fn new(template: String) -> Result<Self, NewPromptError> {
|
pub fn new(template: String, max_bytes: Option<NonZeroUsize>) -> Result<Self, NewPromptError> {
|
||||||
let this = Self {
|
let this = Self {
|
||||||
template: liquid::ParserBuilder::with_stdlib()
|
template: liquid::ParserBuilder::with_stdlib()
|
||||||
.build()
|
.build()
|
||||||
@ -79,6 +98,7 @@ impl Prompt {
|
|||||||
.parse(&template)
|
.parse(&template)
|
||||||
.map_err(NewPromptError::cannot_parse_template)?,
|
.map_err(NewPromptError::cannot_parse_template)?,
|
||||||
template_text: template,
|
template_text: template,
|
||||||
|
max_bytes,
|
||||||
};
|
};
|
||||||
|
|
||||||
// render template with special object that's OK with `doc.*` and `fields.*`
|
// render template with special object that's OK with `doc.*` and `fields.*`
|
||||||
@ -93,20 +113,72 @@ impl Prompt {
|
|||||||
&self,
|
&self,
|
||||||
document: &obkv::KvReaderU16,
|
document: &obkv::KvReaderU16,
|
||||||
side: DelAdd,
|
side: DelAdd,
|
||||||
field_id_map: &FieldsIdsMap,
|
field_id_map: &FieldsIdsMapWithMetadata,
|
||||||
) -> Result<String, RenderPromptError> {
|
) -> Result<String, RenderPromptError> {
|
||||||
let document = Document::new(document, side, field_id_map);
|
let document = Document::new(document, side, field_id_map);
|
||||||
let context = Context::new(&document, field_id_map);
|
let context = Context::new(&document, field_id_map);
|
||||||
|
|
||||||
self.template.render(&context).map_err(RenderPromptError::missing_context)
|
let mut rendered =
|
||||||
|
self.template.render(&context).map_err(RenderPromptError::missing_context)?;
|
||||||
|
if let Some(max_bytes) = self.max_bytes {
|
||||||
|
truncate(&mut rendered, max_bytes.get());
|
||||||
|
}
|
||||||
|
Ok(rendered)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn truncate(s: &mut String, max_bytes: usize) {
|
||||||
|
if max_bytes >= s.len() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for i in (0..=max_bytes).rev() {
|
||||||
|
if s.is_char_boundary(i) {
|
||||||
|
s.truncate(i);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FieldsIdsMapWithMetadata<'a> {
|
||||||
|
fields_ids_map: &'a FieldsIdsMap,
|
||||||
|
metadata: BTreeMap<FieldId, FieldMetadata>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> FieldsIdsMapWithMetadata<'a> {
|
||||||
|
pub fn new(fields_ids_map: &'a FieldsIdsMap, searchable_fields_ids: &'_ [FieldId]) -> Self {
|
||||||
|
let mut metadata: BTreeMap<FieldId, FieldMetadata> =
|
||||||
|
fields_ids_map.ids().map(|id| (id, Default::default())).collect();
|
||||||
|
for searchable_field_id in searchable_fields_ids {
|
||||||
|
let Some(metadata) = metadata.get_mut(searchable_field_id) else { continue };
|
||||||
|
metadata.searchable = true;
|
||||||
|
}
|
||||||
|
Self { fields_ids_map, metadata }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn metadata(&self, field_id: FieldId) -> Option<FieldMetadata> {
|
||||||
|
self.metadata.get(&field_id).copied()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Deref for FieldsIdsMapWithMetadata<'a> {
|
||||||
|
type Target = FieldsIdsMap;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
self.fields_ids_map
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Clone, Copy)]
|
||||||
|
pub struct FieldMetadata {
|
||||||
|
pub searchable: bool,
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::Prompt;
|
use super::Prompt;
|
||||||
use crate::error::FaultSource;
|
use crate::error::FaultSource;
|
||||||
use crate::prompt::error::{NewPromptError, NewPromptErrorKind};
|
use crate::prompt::error::{NewPromptError, NewPromptErrorKind};
|
||||||
|
use crate::prompt::truncate;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn default_template() {
|
fn default_template() {
|
||||||
@ -116,18 +188,18 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn empty_template() {
|
fn empty_template() {
|
||||||
Prompt::new("".into()).unwrap();
|
Prompt::new("".into(), None).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn template_ok() {
|
fn template_ok() {
|
||||||
Prompt::new("{{doc.title}}: {{doc.overview}}".into()).unwrap();
|
Prompt::new("{{doc.title}}: {{doc.overview}}".into(), None).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn template_syntax() {
|
fn template_syntax() {
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
Prompt::new("{{doc.title: {{doc.overview}}".into()),
|
Prompt::new("{{doc.title: {{doc.overview}}".into(), None),
|
||||||
Err(NewPromptError {
|
Err(NewPromptError {
|
||||||
kind: NewPromptErrorKind::CannotParseTemplate(_),
|
kind: NewPromptErrorKind::CannotParseTemplate(_),
|
||||||
fault: FaultSource::User
|
fault: FaultSource::User
|
||||||
@ -138,7 +210,7 @@ mod test {
|
|||||||
#[test]
|
#[test]
|
||||||
fn template_missing_doc() {
|
fn template_missing_doc() {
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
Prompt::new("{{title}}: {{overview}}".into()),
|
Prompt::new("{{title}}: {{overview}}".into(), None),
|
||||||
Err(NewPromptError {
|
Err(NewPromptError {
|
||||||
kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
|
kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
|
||||||
fault: FaultSource::User
|
fault: FaultSource::User
|
||||||
@ -148,29 +220,62 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn template_nested_doc() {
|
fn template_nested_doc() {
|
||||||
Prompt::new("{{doc.actor.firstName}}: {{doc.actor.lastName}}".into()).unwrap();
|
Prompt::new("{{doc.actor.firstName}}: {{doc.actor.lastName}}".into(), None).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn template_fields() {
|
fn template_fields() {
|
||||||
Prompt::new("{% for field in fields %}{{field}}{% endfor %}".into()).unwrap();
|
Prompt::new("{% for field in fields %}{{field}}{% endfor %}".into(), None).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn template_fields_ok() {
|
fn template_fields_ok() {
|
||||||
Prompt::new("{% for field in fields %}{{field.name}}: {{field.value}}{% endfor %}".into())
|
Prompt::new(
|
||||||
.unwrap();
|
"{% for field in fields %}{{field.name}}: {{field.value}}{% endfor %}".into(),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn template_fields_invalid() {
|
fn template_fields_invalid() {
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
// intentionally garbled field
|
// intentionally garbled field
|
||||||
Prompt::new("{% for field in fields %}{{field.vaelu}} {% endfor %}".into()),
|
Prompt::new("{% for field in fields %}{{field.vaelu}} {% endfor %}".into(), None),
|
||||||
Err(NewPromptError {
|
Err(NewPromptError {
|
||||||
kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
|
kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
|
||||||
fault: FaultSource::User
|
fault: FaultSource::User
|
||||||
})
|
})
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// todo: test truncation
|
||||||
|
#[test]
|
||||||
|
fn template_truncation() {
|
||||||
|
let mut s = "インテル ザー ビーグル".to_string();
|
||||||
|
|
||||||
|
truncate(&mut s, 42);
|
||||||
|
assert_eq!(s, "インテル ザー ビーグル");
|
||||||
|
|
||||||
|
assert_eq!(s.len(), 32);
|
||||||
|
truncate(&mut s, 32);
|
||||||
|
assert_eq!(s, "インテル ザー ビーグル");
|
||||||
|
|
||||||
|
truncate(&mut s, 31);
|
||||||
|
assert_eq!(s, "インテル ザー ビーグ");
|
||||||
|
truncate(&mut s, 30);
|
||||||
|
assert_eq!(s, "インテル ザー ビーグ");
|
||||||
|
truncate(&mut s, 28);
|
||||||
|
assert_eq!(s, "インテル ザー ビー");
|
||||||
|
truncate(&mut s, 26);
|
||||||
|
assert_eq!(s, "インテル ザー ビー");
|
||||||
|
truncate(&mut s, 25);
|
||||||
|
assert_eq!(s, "インテル ザー ビ");
|
||||||
|
|
||||||
|
assert_eq!("イ".len(), 3);
|
||||||
|
truncate(&mut s, 3);
|
||||||
|
assert_eq!(s, "イ");
|
||||||
|
truncate(&mut s, 2);
|
||||||
|
assert_eq!(s, "");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||||
|
use std::fmt::Display;
|
||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
use std::{fmt, mem};
|
use std::{fmt, mem};
|
||||||
|
|
||||||
@ -37,6 +38,15 @@ pub enum OrderBy {
|
|||||||
Count,
|
Count,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Display for OrderBy {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
OrderBy::Lexicographic => f.write_str("alphabetically"),
|
||||||
|
OrderBy::Count => f.write_str("by count"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct FacetDistribution<'a> {
|
pub struct FacetDistribution<'a> {
|
||||||
facets: Option<HashMap<String, OrderBy>>,
|
facets: Option<HashMap<String, OrderBy>>,
|
||||||
candidates: Option<RoaringBitmap>,
|
candidates: Option<RoaringBitmap>,
|
||||||
@ -100,7 +110,6 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
let mut lexicographic_distribution = BTreeMap::new();
|
let mut lexicographic_distribution = BTreeMap::new();
|
||||||
let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
|
let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
|
||||||
|
|
||||||
let distribution_prelength = distribution.len();
|
|
||||||
let db = self.index.field_id_docid_facet_f64s;
|
let db = self.index.field_id_docid_facet_f64s;
|
||||||
for docid in candidates {
|
for docid in candidates {
|
||||||
key_buffer.truncate(mem::size_of::<FieldId>());
|
key_buffer.truncate(mem::size_of::<FieldId>());
|
||||||
@ -113,23 +122,21 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
for result in iter {
|
for result in iter {
|
||||||
let ((_, _, value), ()) = result?;
|
let ((_, _, value), ()) = result?;
|
||||||
*lexicographic_distribution.entry(value.to_string()).or_insert(0) += 1;
|
*lexicographic_distribution.entry(value.to_string()).or_insert(0) += 1;
|
||||||
|
|
||||||
if lexicographic_distribution.len() - distribution_prelength
|
|
||||||
== self.max_values_per_facet
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
distribution.extend(lexicographic_distribution);
|
distribution.extend(
|
||||||
|
lexicographic_distribution
|
||||||
|
.into_iter()
|
||||||
|
.take(self.max_values_per_facet.saturating_sub(distribution.len())),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
FacetType::String => {
|
FacetType::String => {
|
||||||
let mut normalized_distribution = BTreeMap::new();
|
let mut normalized_distribution = BTreeMap::new();
|
||||||
let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
|
let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
|
||||||
|
|
||||||
let db = self.index.field_id_docid_facet_strings;
|
let db = self.index.field_id_docid_facet_strings;
|
||||||
'outer: for docid in candidates {
|
for docid in candidates {
|
||||||
key_buffer.truncate(mem::size_of::<FieldId>());
|
key_buffer.truncate(mem::size_of::<FieldId>());
|
||||||
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
||||||
let iter = db
|
let iter = db
|
||||||
@ -144,14 +151,14 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
.or_insert_with(|| (original_value, 0));
|
.or_insert_with(|| (original_value, 0));
|
||||||
*count += 1;
|
*count += 1;
|
||||||
|
|
||||||
if normalized_distribution.len() == self.max_values_per_facet {
|
// we'd like to break here if we have enough facet values, but we are collecting them by increasing docid,
|
||||||
break 'outer;
|
// so higher ranked facets could be in later docids
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let iter = normalized_distribution
|
let iter = normalized_distribution
|
||||||
.into_iter()
|
.into_iter()
|
||||||
|
.take(self.max_values_per_facet.saturating_sub(distribution.len()))
|
||||||
.map(|(_normalized, (original, count))| (original.to_string(), count));
|
.map(|(_normalized, (original, count))| (original.to_string(), count));
|
||||||
distribution.extend(iter);
|
distribution.extend(iter);
|
||||||
}
|
}
|
||||||
@ -467,7 +474,7 @@ mod tests {
|
|||||||
.execute()
|
.execute()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 1}}"###);
|
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2}}"###);
|
||||||
|
|
||||||
let map = FacetDistribution::new(&txn, &index)
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
.facets(iter::once(("colour", OrderBy::Count)))
|
.facets(iter::once(("colour", OrderBy::Count)))
|
||||||
|
@ -12,7 +12,7 @@ use serde_json::Value;
|
|||||||
use super::facet_range_search;
|
use super::facet_range_search;
|
||||||
use crate::error::{Error, UserError};
|
use crate::error::{Error, UserError};
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec,
|
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec,
|
||||||
};
|
};
|
||||||
use crate::index::db_name::FACET_ID_STRING_DOCIDS;
|
use crate::index::db_name::FACET_ID_STRING_DOCIDS;
|
||||||
use crate::{
|
use crate::{
|
||||||
@ -336,6 +336,24 @@ impl<'a> Filter<'a> {
|
|||||||
|
|
||||||
return Ok(docids);
|
return Ok(docids);
|
||||||
}
|
}
|
||||||
|
Condition::StartsWith { keyword: _, word } => {
|
||||||
|
let value = crate::normalize_facet(word.value());
|
||||||
|
let base = FacetGroupKey { field_id, level: 0, left_bound: value.as_str() };
|
||||||
|
let docids = strings_db
|
||||||
|
.prefix_iter(rtxn, &base)?
|
||||||
|
.map(|result| -> Result<RoaringBitmap> {
|
||||||
|
match result {
|
||||||
|
Ok((_facet_group_key, FacetGroupValue { bitmap, .. })) => Ok(bitmap),
|
||||||
|
Err(_e) => Err(InternalError::from(SerializationError::Decoding {
|
||||||
|
db_name: Some(FACET_ID_STRING_DOCIDS),
|
||||||
|
})
|
||||||
|
.into()),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.union()?;
|
||||||
|
|
||||||
|
return Ok(docids);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut output = RoaringBitmap::new();
|
let mut output = RoaringBitmap::new();
|
||||||
|
@ -190,7 +190,7 @@ impl<'a> Search<'a> {
|
|||||||
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||||
};
|
};
|
||||||
// no embedder, no semantic search
|
// no embedder, no semantic search
|
||||||
let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else {
|
let Some(SemanticSearch { vector, embedder_name, embedder, quantized }) = semantic else {
|
||||||
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -212,7 +212,7 @@ impl<'a> Search<'a> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
search.semantic =
|
search.semantic =
|
||||||
Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder });
|
Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder, quantized });
|
||||||
|
|
||||||
// TODO: would be better to have two distinct functions at this point
|
// TODO: would be better to have two distinct functions at this point
|
||||||
let vector_results = search.execute()?;
|
let vector_results = search.execute()?;
|
||||||
|
@ -32,6 +32,7 @@ pub struct SemanticSearch {
|
|||||||
vector: Option<Vec<f32>>,
|
vector: Option<Vec<f32>>,
|
||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
|
quantized: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Search<'a> {
|
pub struct Search<'a> {
|
||||||
@ -89,9 +90,10 @@ impl<'a> Search<'a> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
|
quantized: bool,
|
||||||
vector: Option<Vec<f32>>,
|
vector: Option<Vec<f32>>,
|
||||||
) -> &mut Search<'a> {
|
) -> &mut Search<'a> {
|
||||||
self.semantic = Some(SemanticSearch { embedder_name, embedder, vector });
|
self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector });
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -206,7 +208,7 @@ impl<'a> Search<'a> {
|
|||||||
degraded,
|
degraded,
|
||||||
used_negative_operator,
|
used_negative_operator,
|
||||||
} = match self.semantic.as_ref() {
|
} = match self.semantic.as_ref() {
|
||||||
Some(SemanticSearch { vector: Some(vector), embedder_name, embedder }) => {
|
Some(SemanticSearch { vector: Some(vector), embedder_name, embedder, quantized }) => {
|
||||||
execute_vector_search(
|
execute_vector_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
vector,
|
vector,
|
||||||
@ -219,6 +221,7 @@ impl<'a> Search<'a> {
|
|||||||
self.limit,
|
self.limit,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
|
*quantized,
|
||||||
self.time_budget.clone(),
|
self.time_budget.clone(),
|
||||||
self.ranking_score_threshold,
|
self.ranking_score_threshold,
|
||||||
)?
|
)?
|
||||||
|
@ -312,6 +312,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
|||||||
Ok(ranking_rules)
|
Ok(ranking_rules)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn get_ranking_rules_for_vector<'ctx>(
|
fn get_ranking_rules_for_vector<'ctx>(
|
||||||
ctx: &SearchContext<'ctx>,
|
ctx: &SearchContext<'ctx>,
|
||||||
sort_criteria: &Option<Vec<AscDesc>>,
|
sort_criteria: &Option<Vec<AscDesc>>,
|
||||||
@ -320,6 +321,7 @@ fn get_ranking_rules_for_vector<'ctx>(
|
|||||||
target: &[f32],
|
target: &[f32],
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
embedder: &Embedder,
|
embedder: &Embedder,
|
||||||
|
quantized: bool,
|
||||||
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
|
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
|
||||||
// query graph search
|
// query graph search
|
||||||
|
|
||||||
@ -347,6 +349,7 @@ fn get_ranking_rules_for_vector<'ctx>(
|
|||||||
limit_plus_offset,
|
limit_plus_offset,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
|
quantized,
|
||||||
)?;
|
)?;
|
||||||
ranking_rules.push(Box::new(vector_sort));
|
ranking_rules.push(Box::new(vector_sort));
|
||||||
vector = true;
|
vector = true;
|
||||||
@ -576,6 +579,7 @@ pub fn execute_vector_search(
|
|||||||
length: usize,
|
length: usize,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
embedder: &Embedder,
|
embedder: &Embedder,
|
||||||
|
quantized: bool,
|
||||||
time_budget: TimeBudget,
|
time_budget: TimeBudget,
|
||||||
ranking_score_threshold: Option<f64>,
|
ranking_score_threshold: Option<f64>,
|
||||||
) -> Result<PartialSearchResult> {
|
) -> Result<PartialSearchResult> {
|
||||||
@ -591,6 +595,7 @@ pub fn execute_vector_search(
|
|||||||
vector,
|
vector,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
|
quantized,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let mut placeholder_search_logger = logger::DefaultSearchLogger;
|
let mut placeholder_search_logger = logger::DefaultSearchLogger;
|
||||||
|
@ -16,6 +16,7 @@ pub struct VectorSort<Q: RankingRuleQueryTrait> {
|
|||||||
limit: usize,
|
limit: usize,
|
||||||
distribution_shift: Option<DistributionShift>,
|
distribution_shift: Option<DistributionShift>,
|
||||||
embedder_index: u8,
|
embedder_index: u8,
|
||||||
|
quantized: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||||
@ -26,6 +27,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
|||||||
limit: usize,
|
limit: usize,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
embedder: &Embedder,
|
embedder: &Embedder,
|
||||||
|
quantized: bool,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let embedder_index = ctx
|
let embedder_index = ctx
|
||||||
.index
|
.index
|
||||||
@ -41,6 +43,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
|||||||
limit,
|
limit,
|
||||||
distribution_shift: embedder.distribution(),
|
distribution_shift: embedder.distribution(),
|
||||||
embedder_index,
|
embedder_index,
|
||||||
|
quantized,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -49,16 +52,12 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
|||||||
ctx: &mut SearchContext<'_>,
|
ctx: &mut SearchContext<'_>,
|
||||||
vector_candidates: &RoaringBitmap,
|
vector_candidates: &RoaringBitmap,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let readers: std::result::Result<Vec<_>, _> =
|
|
||||||
ctx.index.arroy_readers(ctx.txn, self.embedder_index).collect();
|
|
||||||
let readers = readers?;
|
|
||||||
|
|
||||||
let target = &self.target;
|
let target = &self.target;
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
|
|
||||||
for reader in readers.iter() {
|
for reader in ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized) {
|
||||||
let nns_by_vector =
|
let nns_by_vector =
|
||||||
reader.nns_by_vector(ctx.txn, target, self.limit, None, Some(vector_candidates))?;
|
reader?.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
|
||||||
results.extend(nns_by_vector.into_iter());
|
results.extend(nns_by_vector.into_iter());
|
||||||
}
|
}
|
||||||
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
||||||
|
@ -18,9 +18,11 @@ pub struct Similar<'a> {
|
|||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
ranking_score_threshold: Option<f64>,
|
ranking_score_threshold: Option<f64>,
|
||||||
|
quantized: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Similar<'a> {
|
impl<'a> Similar<'a> {
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn new(
|
pub fn new(
|
||||||
id: DocumentId,
|
id: DocumentId,
|
||||||
offset: usize,
|
offset: usize,
|
||||||
@ -29,6 +31,7 @@ impl<'a> Similar<'a> {
|
|||||||
rtxn: &'a heed::RoTxn<'a>,
|
rtxn: &'a heed::RoTxn<'a>,
|
||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
|
quantized: bool,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
id,
|
id,
|
||||||
@ -40,6 +43,7 @@ impl<'a> Similar<'a> {
|
|||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
ranking_score_threshold: None,
|
ranking_score_threshold: None,
|
||||||
|
quantized,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -67,19 +71,13 @@ impl<'a> Similar<'a> {
|
|||||||
.get(self.rtxn, &self.embedder_name)?
|
.get(self.rtxn, &self.embedder_name)?
|
||||||
.ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?;
|
.ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?;
|
||||||
|
|
||||||
let readers: std::result::Result<Vec<_>, _> =
|
|
||||||
self.index.arroy_readers(self.rtxn, embedder_index).collect();
|
|
||||||
|
|
||||||
let readers = readers?;
|
|
||||||
|
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
|
|
||||||
for reader in readers.iter() {
|
for reader in self.index.arroy_readers(self.rtxn, embedder_index, self.quantized) {
|
||||||
let nns_by_item = reader.nns_by_item(
|
let nns_by_item = reader?.nns_by_item(
|
||||||
self.rtxn,
|
self.rtxn,
|
||||||
self.id,
|
self.id,
|
||||||
self.limit + self.offset + 1,
|
self.limit + self.offset + 1,
|
||||||
None,
|
|
||||||
Some(&universe),
|
Some(&universe),
|
||||||
)?;
|
)?;
|
||||||
if let Some(mut nns_by_item) = nns_by_item {
|
if let Some(mut nns_by_item) = nns_by_item {
|
||||||
|
@ -15,14 +15,14 @@ use serde_json::Value;
|
|||||||
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
||||||
use crate::error::FaultSource;
|
use crate::error::FaultSource;
|
||||||
use crate::index::IndexEmbeddingConfig;
|
use crate::index::IndexEmbeddingConfig;
|
||||||
use crate::prompt::Prompt;
|
use crate::prompt::{FieldsIdsMapWithMetadata, Prompt};
|
||||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||||
use crate::update::settings::InnerIndexSettingsDiff;
|
use crate::update::settings::InnerIndexSettingsDiff;
|
||||||
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
|
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
|
||||||
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME};
|
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME};
|
||||||
use crate::vector::settings::{EmbedderAction, ReindexAction};
|
use crate::vector::settings::ReindexAction;
|
||||||
use crate::vector::{Embedder, Embeddings};
|
use crate::vector::{Embedder, Embeddings};
|
||||||
use crate::{try_split_array_at, DocumentId, FieldId, FieldsIdsMap, Result, ThreadPoolNoAbort};
|
use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort};
|
||||||
|
|
||||||
/// The length of the elements that are always in the buffer when inserting new values.
|
/// The length of the elements that are always in the buffer when inserting new values.
|
||||||
const TRUNCATE_SIZE: usize = size_of::<DocumentId>();
|
const TRUNCATE_SIZE: usize = size_of::<DocumentId>();
|
||||||
@ -189,7 +189,13 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
let reindex_vectors = settings_diff.reindex_vectors();
|
let reindex_vectors = settings_diff.reindex_vectors();
|
||||||
|
|
||||||
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
|
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
|
||||||
|
let old_fields_ids_map =
|
||||||
|
FieldsIdsMapWithMetadata::new(old_fields_ids_map, &settings_diff.old.searchable_fields_ids);
|
||||||
|
|
||||||
let new_fields_ids_map = &settings_diff.new.fields_ids_map;
|
let new_fields_ids_map = &settings_diff.new.fields_ids_map;
|
||||||
|
let new_fields_ids_map =
|
||||||
|
FieldsIdsMapWithMetadata::new(new_fields_ids_map, &settings_diff.new.searchable_fields_ids);
|
||||||
|
|
||||||
// the vector field id may have changed
|
// the vector field id may have changed
|
||||||
let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
|
let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
|
||||||
|
|
||||||
@ -202,65 +208,65 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
if reindex_vectors {
|
if reindex_vectors {
|
||||||
for (name, action) in settings_diff.embedding_config_updates.iter() {
|
for (name, action) in settings_diff.embedding_config_updates.iter() {
|
||||||
match action {
|
if let Some(action) = action.reindex() {
|
||||||
EmbedderAction::WriteBackToDocuments(_) => continue, // already deleted
|
let Some((embedder_name, (embedder, prompt, _quantized))) =
|
||||||
EmbedderAction::Reindex(action) => {
|
configs.remove_entry(name)
|
||||||
let Some((embedder_name, (embedder, prompt))) = configs.remove_entry(name)
|
else {
|
||||||
else {
|
tracing::error!(embedder = name, "Requested embedder config not found");
|
||||||
tracing::error!(embedder = name, "Requested embedder config not found");
|
continue;
|
||||||
continue;
|
};
|
||||||
};
|
|
||||||
|
|
||||||
// (docid, _index) -> KvWriterDelAdd -> Vector
|
// (docid, _index) -> KvWriterDelAdd -> Vector
|
||||||
let manual_vectors_writer = create_writer(
|
let manual_vectors_writer = create_writer(
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
tempfile::tempfile()?,
|
tempfile::tempfile()?,
|
||||||
);
|
);
|
||||||
|
|
||||||
// (docid) -> (prompt)
|
// (docid) -> (prompt)
|
||||||
let prompts_writer = create_writer(
|
let prompts_writer = create_writer(
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
tempfile::tempfile()?,
|
tempfile::tempfile()?,
|
||||||
);
|
);
|
||||||
|
|
||||||
// (docid) -> ()
|
// (docid) -> ()
|
||||||
let remove_vectors_writer = create_writer(
|
let remove_vectors_writer = create_writer(
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
tempfile::tempfile()?,
|
tempfile::tempfile()?,
|
||||||
);
|
);
|
||||||
|
|
||||||
let action = match action {
|
let action = match action {
|
||||||
ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex,
|
ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex,
|
||||||
ReindexAction::RegeneratePrompts => {
|
ReindexAction::RegeneratePrompts => {
|
||||||
let Some((_, old_prompt)) = old_configs.get(name) else {
|
let Some((_, old_prompt, _quantized)) = old_configs.get(name) else {
|
||||||
tracing::error!(embedder = name, "Old embedder config not found");
|
tracing::error!(embedder = name, "Old embedder config not found");
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
ExtractionAction::SettingsRegeneratePrompts { old_prompt }
|
ExtractionAction::SettingsRegeneratePrompts { old_prompt }
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
extractors.push(EmbedderVectorExtractor {
|
extractors.push(EmbedderVectorExtractor {
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
prompt,
|
prompt,
|
||||||
prompts_writer,
|
prompts_writer,
|
||||||
remove_vectors_writer,
|
remove_vectors_writer,
|
||||||
manual_vectors_writer,
|
manual_vectors_writer,
|
||||||
add_to_user_provided: RoaringBitmap::new(),
|
add_to_user_provided: RoaringBitmap::new(),
|
||||||
action,
|
action,
|
||||||
});
|
});
|
||||||
}
|
} else {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// document operation
|
// document operation
|
||||||
|
|
||||||
for (embedder_name, (embedder, prompt)) in configs.into_iter() {
|
for (embedder_name, (embedder, prompt, _quantized)) in configs.into_iter() {
|
||||||
// (docid, _index) -> KvWriterDelAdd -> Vector
|
// (docid, _index) -> KvWriterDelAdd -> Vector
|
||||||
let manual_vectors_writer = create_writer(
|
let manual_vectors_writer = create_writer(
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
@ -376,7 +382,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
regenerate_prompt(obkv, prompt, new_fields_ids_map)?
|
regenerate_prompt(obkv, prompt, &new_fields_ids_map)?
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
// prompt regeneration is only triggered for existing embedders
|
// prompt regeneration is only triggered for existing embedders
|
||||||
@ -393,7 +399,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
regenerate_if_prompt_changed(
|
regenerate_if_prompt_changed(
|
||||||
obkv,
|
obkv,
|
||||||
(old_prompt, prompt),
|
(old_prompt, prompt),
|
||||||
(old_fields_ids_map, new_fields_ids_map),
|
(&old_fields_ids_map, &new_fields_ids_map),
|
||||||
)?
|
)?
|
||||||
} else {
|
} else {
|
||||||
// we can simply ignore user provided vectors as they are not regenerated and are
|
// we can simply ignore user provided vectors as they are not regenerated and are
|
||||||
@ -409,7 +415,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
prompt,
|
prompt,
|
||||||
(add_to_user_provided, remove_from_user_provided),
|
(add_to_user_provided, remove_from_user_provided),
|
||||||
(old, new),
|
(old, new),
|
||||||
(old_fields_ids_map, new_fields_ids_map),
|
(&old_fields_ids_map, &new_fields_ids_map),
|
||||||
document_id,
|
document_id,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder_is_manual,
|
embedder_is_manual,
|
||||||
@ -479,7 +485,10 @@ fn extract_vector_document_diff(
|
|||||||
prompt: &Prompt,
|
prompt: &Prompt,
|
||||||
(add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap),
|
(add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap),
|
||||||
(old, new): (VectorState, VectorState),
|
(old, new): (VectorState, VectorState),
|
||||||
(old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap),
|
(old_fields_ids_map, new_fields_ids_map): (
|
||||||
|
&FieldsIdsMapWithMetadata,
|
||||||
|
&FieldsIdsMapWithMetadata,
|
||||||
|
),
|
||||||
document_id: impl Fn() -> Value,
|
document_id: impl Fn() -> Value,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
embedder_is_manual: bool,
|
embedder_is_manual: bool,
|
||||||
@ -599,7 +608,10 @@ fn extract_vector_document_diff(
|
|||||||
fn regenerate_if_prompt_changed(
|
fn regenerate_if_prompt_changed(
|
||||||
obkv: &obkv::KvReader<FieldId>,
|
obkv: &obkv::KvReader<FieldId>,
|
||||||
(old_prompt, new_prompt): (&Prompt, &Prompt),
|
(old_prompt, new_prompt): (&Prompt, &Prompt),
|
||||||
(old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap),
|
(old_fields_ids_map, new_fields_ids_map): (
|
||||||
|
&FieldsIdsMapWithMetadata,
|
||||||
|
&FieldsIdsMapWithMetadata,
|
||||||
|
),
|
||||||
) -> Result<VectorStateDelta> {
|
) -> Result<VectorStateDelta> {
|
||||||
let old_prompt =
|
let old_prompt =
|
||||||
old_prompt.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or(Default::default());
|
old_prompt.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or(Default::default());
|
||||||
@ -614,7 +626,7 @@ fn regenerate_if_prompt_changed(
|
|||||||
fn regenerate_prompt(
|
fn regenerate_prompt(
|
||||||
obkv: &obkv::KvReader<FieldId>,
|
obkv: &obkv::KvReader<FieldId>,
|
||||||
prompt: &Prompt,
|
prompt: &Prompt,
|
||||||
new_fields_ids_map: &FieldsIdsMap,
|
new_fields_ids_map: &FieldsIdsMapWithMetadata,
|
||||||
) -> Result<VectorStateDelta> {
|
) -> Result<VectorStateDelta> {
|
||||||
let prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
let prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ use crate::update::index_documents::parallel::ImmutableObkvs;
|
|||||||
use crate::update::{
|
use crate::update::{
|
||||||
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
||||||
};
|
};
|
||||||
use crate::vector::EmbeddingConfigs;
|
use crate::vector::{ArroyWrapper, EmbeddingConfigs};
|
||||||
use crate::{CboRoaringBitmapCodec, Index, Object, Result};
|
use crate::{CboRoaringBitmapCodec, Index, Object, Result};
|
||||||
|
|
||||||
static MERGED_DATABASE_COUNT: usize = 7;
|
static MERGED_DATABASE_COUNT: usize = 7;
|
||||||
@ -673,6 +673,24 @@ where
|
|||||||
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||||
|
|
||||||
|
// If an embedder wasn't used in the typedchunk but must be binary quantized
|
||||||
|
// we should insert it in `dimension`
|
||||||
|
for (name, action) in settings_diff.embedding_config_updates.iter() {
|
||||||
|
if action.is_being_quantized && !dimension.contains_key(name.as_str()) {
|
||||||
|
let index = self.index.embedder_category_id.get(self.wtxn, name)?.ok_or(
|
||||||
|
InternalError::DatabaseMissingEntry {
|
||||||
|
db_name: "embedder_category_id",
|
||||||
|
key: None,
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap();
|
||||||
|
let reader =
|
||||||
|
ArroyWrapper::new(self.index.vector_arroy, first_id, action.was_quantized);
|
||||||
|
let dim = reader.dimensions(self.wtxn)?;
|
||||||
|
dimension.insert(name.to_string(), dim);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (embedder_name, dimension) in dimension {
|
for (embedder_name, dimension) in dimension {
|
||||||
let wtxn = &mut *self.wtxn;
|
let wtxn = &mut *self.wtxn;
|
||||||
let vector_arroy = self.index.vector_arroy;
|
let vector_arroy = self.index.vector_arroy;
|
||||||
@ -680,13 +698,23 @@ where
|
|||||||
let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
||||||
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
||||||
)?;
|
)?;
|
||||||
|
let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name);
|
||||||
|
let was_quantized = settings_diff
|
||||||
|
.old
|
||||||
|
.embedding_configs
|
||||||
|
.get(&embedder_name)
|
||||||
|
.map_or(false, |conf| conf.2);
|
||||||
|
let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized);
|
||||||
|
|
||||||
pool.install(|| {
|
pool.install(|| {
|
||||||
for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
|
for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
|
||||||
let writer = arroy::Writer::new(vector_arroy, k, dimension);
|
let mut writer = ArroyWrapper::new(vector_arroy, k, was_quantized);
|
||||||
if writer.need_build(wtxn)? {
|
if is_quantizing {
|
||||||
writer.build(wtxn, &mut rng, None)?;
|
writer.quantize(wtxn, k, dimension)?;
|
||||||
} else if writer.is_empty(wtxn)? {
|
}
|
||||||
|
if writer.need_build(wtxn, dimension)? {
|
||||||
|
writer.build(wtxn, &mut rng, dimension)?;
|
||||||
|
} else if writer.is_empty(wtxn, dimension)? {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2734,11 +2762,13 @@ mod tests {
|
|||||||
api_key: Setting::NotSet,
|
api_key: Setting::NotSet,
|
||||||
dimensions: Setting::Set(3),
|
dimensions: Setting::Set(3),
|
||||||
document_template: Setting::NotSet,
|
document_template: Setting::NotSet,
|
||||||
|
document_template_max_bytes: Setting::NotSet,
|
||||||
url: Setting::NotSet,
|
url: Setting::NotSet,
|
||||||
request: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
distribution: Setting::NotSet,
|
distribution: Setting::NotSet,
|
||||||
headers: Setting::NotSet,
|
headers: Setting::NotSet,
|
||||||
|
binary_quantized: Setting::NotSet,
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
settings.set_embedder_settings(embedders);
|
settings.set_embedder_settings(embedders);
|
||||||
@ -2767,7 +2797,7 @@ mod tests {
|
|||||||
std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap());
|
std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap());
|
||||||
let res = index
|
let res = index
|
||||||
.search(&rtxn)
|
.search(&rtxn)
|
||||||
.semantic(embedder_name, embedder, Some([0.0, 1.0, 2.0].to_vec()))
|
.semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec()))
|
||||||
.execute()
|
.execute()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(res.documents_ids.len(), 3);
|
assert_eq!(res.documents_ids.len(), 3);
|
||||||
|
@ -29,7 +29,8 @@ use crate::update::index_documents::GrenadParameters;
|
|||||||
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||||
use crate::update::{AvailableIds, UpdateIndexingStep};
|
use crate::update::{AvailableIds, UpdateIndexingStep};
|
||||||
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||||
use crate::vector::settings::{EmbedderAction, WriteBackToDocuments};
|
use crate::vector::settings::WriteBackToDocuments;
|
||||||
|
use crate::vector::ArroyWrapper;
|
||||||
use crate::{
|
use crate::{
|
||||||
is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
||||||
};
|
};
|
||||||
@ -992,19 +993,17 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
let readers: Result<
|
let readers: Result<BTreeMap<&str, (Vec<ArroyWrapper>, &RoaringBitmap)>> = settings_diff
|
||||||
BTreeMap<&str, (Vec<arroy::Reader<'_, arroy::distances::Angular>>, &RoaringBitmap)>,
|
|
||||||
> = settings_diff
|
|
||||||
.embedding_config_updates
|
.embedding_config_updates
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|(name, action)| {
|
.filter_map(|(name, action)| {
|
||||||
if let EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
|
if let Some(WriteBackToDocuments { embedder_id, user_provided }) =
|
||||||
embedder_id,
|
action.write_back()
|
||||||
user_provided,
|
|
||||||
}) = action
|
|
||||||
{
|
{
|
||||||
let readers: Result<Vec<_>> =
|
let readers: Result<Vec<_>> = self
|
||||||
self.index.arroy_readers(wtxn, *embedder_id).collect();
|
.index
|
||||||
|
.arroy_readers(wtxn, *embedder_id, action.was_quantized)
|
||||||
|
.collect();
|
||||||
match readers {
|
match readers {
|
||||||
Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))),
|
Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))),
|
||||||
Err(error) => Some(Err(error)),
|
Err(error) => Some(Err(error)),
|
||||||
@ -1107,23 +1106,14 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut writers = Vec::new();
|
|
||||||
|
|
||||||
// delete all vectors from the embedders that need removal
|
// delete all vectors from the embedders that need removal
|
||||||
for (_, (readers, _)) in readers {
|
for (_, (readers, _)) in readers {
|
||||||
for reader in readers {
|
for reader in readers {
|
||||||
let dimensions = reader.dimensions();
|
let dimensions = reader.dimensions(wtxn)?;
|
||||||
let arroy_index = reader.index();
|
reader.clear(wtxn, dimensions)?;
|
||||||
drop(reader);
|
|
||||||
let writer = arroy::Writer::new(self.index.vector_arroy, arroy_index, dimensions);
|
|
||||||
writers.push(writer);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for writer in writers {
|
|
||||||
writer.clear(wtxn)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let grenad_params = GrenadParameters {
|
let grenad_params = GrenadParameters {
|
||||||
chunk_compression_type: self.indexer_settings.chunk_compression_type,
|
chunk_compression_type: self.indexer_settings.chunk_compression_type,
|
||||||
chunk_compression_level: self.indexer_settings.chunk_compression_level,
|
chunk_compression_level: self.indexer_settings.chunk_compression_level,
|
||||||
|
@ -26,6 +26,7 @@ use crate::update::index_documents::helpers::{
|
|||||||
as_cloneable_grenad, try_split_array_at, KeepLatestObkv,
|
as_cloneable_grenad, try_split_array_at, KeepLatestObkv,
|
||||||
};
|
};
|
||||||
use crate::update::settings::InnerIndexSettingsDiff;
|
use crate::update::settings::InnerIndexSettingsDiff;
|
||||||
|
use crate::vector::ArroyWrapper;
|
||||||
use crate::{
|
use crate::{
|
||||||
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
|
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
|
||||||
Result, SerializationError, U8StrStrCodec,
|
Result, SerializationError, U8StrStrCodec,
|
||||||
@ -661,9 +662,14 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
||||||
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
||||||
)?;
|
)?;
|
||||||
|
let binary_quantized = settings_diff
|
||||||
|
.old
|
||||||
|
.embedding_configs
|
||||||
|
.get(&embedder_name)
|
||||||
|
.map_or(false, |conf| conf.2);
|
||||||
// FIXME: allow customizing distance
|
// FIXME: allow customizing distance
|
||||||
let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index)
|
let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index)
|
||||||
.map(|k| arroy::Writer::new(index.vector_arroy, k, expected_dimension))
|
.map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
// remove vectors for docids we want them removed
|
// remove vectors for docids we want them removed
|
||||||
@ -674,7 +680,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
|
|
||||||
for writer in &writers {
|
for writer in &writers {
|
||||||
// Uses invariant: vectors are packed in the first writers.
|
// Uses invariant: vectors are packed in the first writers.
|
||||||
if !writer.del_item(wtxn, docid)? {
|
if !writer.del_item(wtxn, expected_dimension, docid)? {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -706,7 +712,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
for (embedding, writer) in embeddings.iter().zip(&writers) {
|
for (embedding, writer) in embeddings.iter().zip(&writers) {
|
||||||
writer.add_item(wtxn, docid, embedding)?;
|
writer.add_item(wtxn, expected_dimension, docid, embedding)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -729,7 +735,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
break;
|
break;
|
||||||
};
|
};
|
||||||
if candidate == vector {
|
if candidate == vector {
|
||||||
writer.del_item(wtxn, docid)?;
|
writer.del_item(wtxn, expected_dimension, docid)?;
|
||||||
deleted_index = Some(index);
|
deleted_index = Some(index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -746,8 +752,13 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
if let Some((last_index, vector)) = last_index_with_a_vector {
|
if let Some((last_index, vector)) = last_index_with_a_vector {
|
||||||
// unwrap: computed the index from the list of writers
|
// unwrap: computed the index from the list of writers
|
||||||
let writer = writers.get(last_index).unwrap();
|
let writer = writers.get(last_index).unwrap();
|
||||||
writer.del_item(wtxn, docid)?;
|
writer.del_item(wtxn, expected_dimension, docid)?;
|
||||||
writers.get(deleted_index).unwrap().add_item(wtxn, docid, &vector)?;
|
writers.get(deleted_index).unwrap().add_item(
|
||||||
|
wtxn,
|
||||||
|
expected_dimension,
|
||||||
|
docid,
|
||||||
|
&vector,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -757,8 +768,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
|
|
||||||
// overflow was detected during vector extraction.
|
// overflow was detected during vector extraction.
|
||||||
for writer in &writers {
|
for writer in &writers {
|
||||||
if !writer.contains_item(wtxn, docid)? {
|
if !writer.contains_item(wtxn, expected_dimension, docid)? {
|
||||||
writer.add_item(wtxn, docid, &vector)?;
|
writer.add_item(wtxn, expected_dimension, docid, &vector)?;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
use std::result::Result as StdResult;
|
use std::result::Result as StdResult;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
@ -19,6 +20,7 @@ use crate::index::{
|
|||||||
IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
|
IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
|
||||||
};
|
};
|
||||||
use crate::order_by_map::OrderByMap;
|
use crate::order_by_map::OrderByMap;
|
||||||
|
use crate::prompt::default_max_bytes;
|
||||||
use crate::proximity::ProximityPrecision;
|
use crate::proximity::ProximityPrecision;
|
||||||
use crate::update::index_documents::IndexDocumentsMethod;
|
use crate::update::index_documents::IndexDocumentsMethod;
|
||||||
use crate::update::{IndexDocuments, UpdateIndexingStep};
|
use crate::update::{IndexDocuments, UpdateIndexingStep};
|
||||||
@ -952,7 +954,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
let old_configs = self.index.embedding_configs(self.wtxn)?;
|
let old_configs = self.index.embedding_configs(self.wtxn)?;
|
||||||
let remove_all: Result<BTreeMap<String, EmbedderAction>> = old_configs
|
let remove_all: Result<BTreeMap<String, EmbedderAction>> = old_configs
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|IndexEmbeddingConfig { name, config: _, user_provided }| -> Result<_> {
|
.map(|IndexEmbeddingConfig { name, config, user_provided }| -> Result<_> {
|
||||||
let embedder_id =
|
let embedder_id =
|
||||||
self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or(
|
self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or(
|
||||||
crate::InternalError::DatabaseMissingEntry {
|
crate::InternalError::DatabaseMissingEntry {
|
||||||
@ -962,10 +964,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
)?;
|
)?;
|
||||||
Ok((
|
Ok((
|
||||||
name,
|
name,
|
||||||
EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
|
EmbedderAction::with_write_back(
|
||||||
embedder_id,
|
WriteBackToDocuments { embedder_id, user_provided },
|
||||||
user_provided,
|
config.quantized(),
|
||||||
}),
|
),
|
||||||
))
|
))
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
@ -1002,7 +1004,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
match joined {
|
match joined {
|
||||||
// updated config
|
// updated config
|
||||||
EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => {
|
EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => {
|
||||||
let settings_diff = SettingsDiff::from_settings(old, new);
|
let was_quantized = old.binary_quantized.set().unwrap_or_default();
|
||||||
|
let settings_diff = SettingsDiff::from_settings(&name, old, new)?;
|
||||||
match settings_diff {
|
match settings_diff {
|
||||||
SettingsDiff::Remove => {
|
SettingsDiff::Remove => {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
@ -1021,25 +1024,29 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
self.index.embedder_category_id.delete(self.wtxn, &name)?;
|
self.index.embedder_category_id.delete(self.wtxn, &name)?;
|
||||||
embedder_actions.insert(
|
embedder_actions.insert(
|
||||||
name,
|
name,
|
||||||
EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
|
EmbedderAction::with_write_back(
|
||||||
embedder_id,
|
WriteBackToDocuments { embedder_id, user_provided },
|
||||||
user_provided,
|
was_quantized,
|
||||||
}),
|
),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
SettingsDiff::Reindex { action, updated_settings } => {
|
SettingsDiff::Reindex { action, updated_settings, quantize } => {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
embedder = name,
|
embedder = name,
|
||||||
user_provided = user_provided.len(),
|
user_provided = user_provided.len(),
|
||||||
?action,
|
?action,
|
||||||
"reindex embedder"
|
"reindex embedder"
|
||||||
);
|
);
|
||||||
embedder_actions.insert(name.clone(), EmbedderAction::Reindex(action));
|
embedder_actions.insert(
|
||||||
|
name.clone(),
|
||||||
|
EmbedderAction::with_reindex(action, was_quantized)
|
||||||
|
.with_is_being_quantized(quantize),
|
||||||
|
);
|
||||||
let new =
|
let new =
|
||||||
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
|
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
|
||||||
updated_configs.insert(name, (new, user_provided));
|
updated_configs.insert(name, (new, user_provided));
|
||||||
}
|
}
|
||||||
SettingsDiff::UpdateWithoutReindex { updated_settings } => {
|
SettingsDiff::UpdateWithoutReindex { updated_settings, quantize } => {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
embedder = name,
|
embedder = name,
|
||||||
user_provided = user_provided.len(),
|
user_provided = user_provided.len(),
|
||||||
@ -1047,6 +1054,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
);
|
);
|
||||||
let new =
|
let new =
|
||||||
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
|
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
|
||||||
|
if quantize {
|
||||||
|
embedder_actions.insert(
|
||||||
|
name.clone(),
|
||||||
|
EmbedderAction::default().with_is_being_quantized(true),
|
||||||
|
);
|
||||||
|
}
|
||||||
updated_configs.insert(name, (new, user_provided));
|
updated_configs.insert(name, (new, user_provided));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1065,8 +1078,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
&mut setting,
|
&mut setting,
|
||||||
);
|
);
|
||||||
let setting = validate_embedding_settings(setting, &name)?;
|
let setting = validate_embedding_settings(setting, &name)?;
|
||||||
embedder_actions
|
embedder_actions.insert(
|
||||||
.insert(name.clone(), EmbedderAction::Reindex(ReindexAction::FullReindex));
|
name.clone(),
|
||||||
|
EmbedderAction::with_reindex(ReindexAction::FullReindex, false),
|
||||||
|
);
|
||||||
updated_configs.insert(name, (setting, RoaringBitmap::new()));
|
updated_configs.insert(name, (setting, RoaringBitmap::new()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1080,19 +1095,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
let mut find_free_index =
|
let mut find_free_index =
|
||||||
move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8);
|
move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8);
|
||||||
for (name, action) in embedder_actions.iter() {
|
for (name, action) in embedder_actions.iter() {
|
||||||
match action {
|
// ignore actions that are not possible for a new embedder
|
||||||
EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => {
|
if matches!(action.reindex(), Some(ReindexAction::FullReindex))
|
||||||
/* cannot be a new embedder, so has to have an id already */
|
&& self.index.embedder_category_id.get(self.wtxn, name)?.is_none()
|
||||||
}
|
{
|
||||||
EmbedderAction::Reindex(ReindexAction::FullReindex) => {
|
let id =
|
||||||
if self.index.embedder_category_id.get(self.wtxn, name)?.is_none() {
|
find_free_index().ok_or(UserError::TooManyEmbedders(updated_configs.len()))?;
|
||||||
let id = find_free_index()
|
tracing::debug!(embedder = name, id, "assigning free id to new embedder");
|
||||||
.ok_or(UserError::TooManyEmbedders(updated_configs.len()))?;
|
self.index.embedder_category_id.put(self.wtxn, name, &id)?;
|
||||||
tracing::debug!(embedder = name, id, "assigning free id to new embedder");
|
|
||||||
self.index.embedder_category_id.put(self.wtxn, name, &id)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
EmbedderAction::WriteBackToDocuments(_) => { /* already removed */ }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let updated_configs: Vec<IndexEmbeddingConfig> = updated_configs
|
let updated_configs: Vec<IndexEmbeddingConfig> = updated_configs
|
||||||
@ -1238,7 +1248,7 @@ impl InnerIndexSettingsDiff {
|
|||||||
old_settings: InnerIndexSettings,
|
old_settings: InnerIndexSettings,
|
||||||
new_settings: InnerIndexSettings,
|
new_settings: InnerIndexSettings,
|
||||||
primary_key_id: Option<FieldId>,
|
primary_key_id: Option<FieldId>,
|
||||||
embedding_config_updates: BTreeMap<String, EmbedderAction>,
|
mut embedding_config_updates: BTreeMap<String, EmbedderAction>,
|
||||||
settings_update_only: bool,
|
settings_update_only: bool,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let only_additional_fields = match (
|
let only_additional_fields = match (
|
||||||
@ -1273,6 +1283,39 @@ impl InnerIndexSettingsDiff {
|
|||||||
let cache_user_defined_searchables = old_settings.user_defined_searchable_fields
|
let cache_user_defined_searchables = old_settings.user_defined_searchable_fields
|
||||||
!= new_settings.user_defined_searchable_fields;
|
!= new_settings.user_defined_searchable_fields;
|
||||||
|
|
||||||
|
// if the user-defined searchables changed, then we need to reindex prompts.
|
||||||
|
if cache_user_defined_searchables {
|
||||||
|
for (embedder_name, (config, _, _quantized)) in
|
||||||
|
new_settings.embedding_configs.inner_as_ref()
|
||||||
|
{
|
||||||
|
let was_quantized =
|
||||||
|
old_settings.embedding_configs.get(embedder_name).map_or(false, |conf| conf.2);
|
||||||
|
// skip embedders that don't use document templates
|
||||||
|
if !config.uses_document_template() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// note: this could currently be entry.or_insert(..), but we're future-proofing with an explicit match
|
||||||
|
// this always makes the code clearer by explicitly handling the cases
|
||||||
|
match embedding_config_updates.entry(embedder_name.clone()) {
|
||||||
|
std::collections::btree_map::Entry::Vacant(entry) => {
|
||||||
|
entry.insert(EmbedderAction::with_reindex(
|
||||||
|
ReindexAction::RegeneratePrompts,
|
||||||
|
was_quantized,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
std::collections::btree_map::Entry::Occupied(entry) => {
|
||||||
|
let EmbedderAction {
|
||||||
|
was_quantized: _,
|
||||||
|
is_being_quantized: _,
|
||||||
|
write_back: _, // We are deleting this embedder, so no point in regeneration
|
||||||
|
reindex: _, // We are already fully reindexing
|
||||||
|
} = entry.get();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
InnerIndexSettingsDiff {
|
InnerIndexSettingsDiff {
|
||||||
old: old_settings,
|
old: old_settings,
|
||||||
new: new_settings,
|
new: new_settings,
|
||||||
@ -1518,7 +1561,7 @@ fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingCo
|
|||||||
.map(
|
.map(
|
||||||
|IndexEmbeddingConfig {
|
|IndexEmbeddingConfig {
|
||||||
name,
|
name,
|
||||||
config: EmbeddingConfig { embedder_options, prompt },
|
config: EmbeddingConfig { embedder_options, prompt, quantized },
|
||||||
..
|
..
|
||||||
}| {
|
}| {
|
||||||
let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
|
let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
|
||||||
@ -1528,7 +1571,7 @@ fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingCo
|
|||||||
.map_err(crate::vector::Error::from)
|
.map_err(crate::vector::Error::from)
|
||||||
.map_err(crate::Error::from)?,
|
.map_err(crate::Error::from)?,
|
||||||
);
|
);
|
||||||
Ok((name, (embedder, prompt)))
|
Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.collect();
|
.collect();
|
||||||
@ -1547,16 +1590,31 @@ fn validate_prompt(
|
|||||||
api_key,
|
api_key,
|
||||||
dimensions,
|
dimensions,
|
||||||
document_template: Setting::Set(template),
|
document_template: Setting::Set(template),
|
||||||
|
document_template_max_bytes,
|
||||||
url,
|
url,
|
||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
|
binary_quantized: binary_quantize,
|
||||||
}) => {
|
}) => {
|
||||||
|
let max_bytes = match document_template_max_bytes.set() {
|
||||||
|
Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| {
|
||||||
|
crate::error::UserError::InvalidSettingsDocumentTemplateMaxBytes {
|
||||||
|
embedder_name: name.to_owned(),
|
||||||
|
}
|
||||||
|
})?,
|
||||||
|
None => default_max_bytes(),
|
||||||
|
};
|
||||||
|
|
||||||
// validate
|
// validate
|
||||||
let template = crate::prompt::Prompt::new(template)
|
let template = crate::prompt::Prompt::new(
|
||||||
.map(|prompt| crate::prompt::PromptData::from(prompt).template)
|
template,
|
||||||
.map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?;
|
// always specify a max_bytes
|
||||||
|
Some(max_bytes),
|
||||||
|
)
|
||||||
|
.map(|prompt| crate::prompt::PromptData::from(prompt).template)
|
||||||
|
.map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?;
|
||||||
|
|
||||||
Ok(Setting::Set(EmbeddingSettings {
|
Ok(Setting::Set(EmbeddingSettings {
|
||||||
source,
|
source,
|
||||||
@ -1565,11 +1623,13 @@ fn validate_prompt(
|
|||||||
api_key,
|
api_key,
|
||||||
dimensions,
|
dimensions,
|
||||||
document_template: Setting::Set(template),
|
document_template: Setting::Set(template),
|
||||||
|
document_template_max_bytes,
|
||||||
url,
|
url,
|
||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
|
binary_quantized: binary_quantize,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
new => Ok(new),
|
new => Ok(new),
|
||||||
@ -1589,11 +1649,13 @@ pub fn validate_embedding_settings(
|
|||||||
api_key,
|
api_key,
|
||||||
dimensions,
|
dimensions,
|
||||||
document_template,
|
document_template,
|
||||||
|
document_template_max_bytes,
|
||||||
url,
|
url,
|
||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
|
binary_quantized: binary_quantize,
|
||||||
} = settings;
|
} = settings;
|
||||||
|
|
||||||
if let Some(0) = dimensions.set() {
|
if let Some(0) = dimensions.set() {
|
||||||
@ -1628,11 +1690,13 @@ pub fn validate_embedding_settings(
|
|||||||
api_key,
|
api_key,
|
||||||
dimensions,
|
dimensions,
|
||||||
document_template,
|
document_template,
|
||||||
|
document_template_max_bytes,
|
||||||
url,
|
url,
|
||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
|
binary_quantized: binary_quantize,
|
||||||
}));
|
}));
|
||||||
};
|
};
|
||||||
match inferred_source {
|
match inferred_source {
|
||||||
@ -1700,6 +1764,12 @@ pub fn validate_embedding_settings(
|
|||||||
inferred_source,
|
inferred_source,
|
||||||
name,
|
name,
|
||||||
)?;
|
)?;
|
||||||
|
check_unset(
|
||||||
|
&document_template_max_bytes,
|
||||||
|
EmbeddingSettings::DOCUMENT_TEMPLATE_MAX_BYTES,
|
||||||
|
inferred_source,
|
||||||
|
name,
|
||||||
|
)?;
|
||||||
check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?;
|
check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?;
|
||||||
|
|
||||||
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
|
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
|
||||||
@ -1722,11 +1792,13 @@ pub fn validate_embedding_settings(
|
|||||||
api_key,
|
api_key,
|
||||||
dimensions,
|
dimensions,
|
||||||
document_template,
|
document_template,
|
||||||
|
document_template_max_bytes,
|
||||||
url,
|
url,
|
||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
|
binary_quantized: binary_quantize,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,8 +1,12 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use arroy::distances::{Angular, BinaryQuantizedAngular};
|
||||||
|
use arroy::ItemId;
|
||||||
use deserr::{DeserializeError, Deserr};
|
use deserr::{DeserializeError, Deserr};
|
||||||
|
use heed::{RoTxn, RwTxn, Unspecified};
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use self::error::{EmbedError, NewEmbedderError};
|
use self::error::{EmbedError, NewEmbedderError};
|
||||||
@ -26,6 +30,171 @@ pub type Embedding = Vec<f32>;
|
|||||||
|
|
||||||
pub const REQUEST_PARALLELISM: usize = 40;
|
pub const REQUEST_PARALLELISM: usize = 40;
|
||||||
|
|
||||||
|
pub struct ArroyWrapper {
|
||||||
|
quantized: bool,
|
||||||
|
index: u16,
|
||||||
|
database: arroy::Database<Unspecified>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ArroyWrapper {
|
||||||
|
pub fn new(database: arroy::Database<Unspecified>, index: u16, quantized: bool) -> Self {
|
||||||
|
Self { database, index, quantized }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn index(&self) -> u16 {
|
||||||
|
self.index
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn dimensions(&self, rtxn: &RoTxn) -> Result<usize, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
Ok(arroy::Reader::open(rtxn, self.index, self.quantized_db())?.dimensions())
|
||||||
|
} else {
|
||||||
|
Ok(arroy::Reader::open(rtxn, self.index, self.angular_db())?.dimensions())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn quantize(
|
||||||
|
&mut self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
index: u16,
|
||||||
|
dimension: usize,
|
||||||
|
) -> Result<(), arroy::Error> {
|
||||||
|
if !self.quantized {
|
||||||
|
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
|
||||||
|
writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?;
|
||||||
|
self.quantized = true;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Writer::new(self.quantized_db(), self.index, dimension).need_build(rtxn)
|
||||||
|
} else {
|
||||||
|
arroy::Writer::new(self.angular_db(), self.index, dimension).need_build(rtxn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build<R: rand::Rng + rand::SeedableRng>(
|
||||||
|
&self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
rng: &mut R,
|
||||||
|
dimension: usize,
|
||||||
|
) -> Result<(), arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Writer::new(self.quantized_db(), self.index, dimension).build(wtxn, rng, None)
|
||||||
|
} else {
|
||||||
|
arroy::Writer::new(self.angular_db(), self.index, dimension).build(wtxn, rng, None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_item(
|
||||||
|
&self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
dimension: usize,
|
||||||
|
item_id: arroy::ItemId,
|
||||||
|
vector: &[f32],
|
||||||
|
) -> Result<(), arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Writer::new(self.quantized_db(), self.index, dimension)
|
||||||
|
.add_item(wtxn, item_id, vector)
|
||||||
|
} else {
|
||||||
|
arroy::Writer::new(self.angular_db(), self.index, dimension)
|
||||||
|
.add_item(wtxn, item_id, vector)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn del_item(
|
||||||
|
&self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
dimension: usize,
|
||||||
|
item_id: arroy::ItemId,
|
||||||
|
) -> Result<bool, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Writer::new(self.quantized_db(), self.index, dimension).del_item(wtxn, item_id)
|
||||||
|
} else {
|
||||||
|
arroy::Writer::new(self.angular_db(), self.index, dimension).del_item(wtxn, item_id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Writer::new(self.quantized_db(), self.index, dimension).clear(wtxn)
|
||||||
|
} else {
|
||||||
|
arroy::Writer::new(self.angular_db(), self.index, dimension).clear(wtxn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Writer::new(self.quantized_db(), self.index, dimension).is_empty(rtxn)
|
||||||
|
} else {
|
||||||
|
arroy::Writer::new(self.angular_db(), self.index, dimension).is_empty(rtxn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn contains_item(
|
||||||
|
&self,
|
||||||
|
rtxn: &RoTxn,
|
||||||
|
dimension: usize,
|
||||||
|
item: arroy::ItemId,
|
||||||
|
) -> Result<bool, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Writer::new(self.quantized_db(), self.index, dimension).contains_item(rtxn, item)
|
||||||
|
} else {
|
||||||
|
arroy::Writer::new(self.angular_db(), self.index, dimension).contains_item(rtxn, item)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn nns_by_item(
|
||||||
|
&self,
|
||||||
|
rtxn: &RoTxn,
|
||||||
|
item: ItemId,
|
||||||
|
limit: usize,
|
||||||
|
filter: Option<&RoaringBitmap>,
|
||||||
|
) -> Result<Option<Vec<(ItemId, f32)>>, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Reader::open(rtxn, self.index, self.quantized_db())?
|
||||||
|
.nns_by_item(rtxn, item, limit, None, None, filter)
|
||||||
|
} else {
|
||||||
|
arroy::Reader::open(rtxn, self.index, self.angular_db())?
|
||||||
|
.nns_by_item(rtxn, item, limit, None, None, filter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn nns_by_vector(
|
||||||
|
&self,
|
||||||
|
txn: &RoTxn,
|
||||||
|
item: &[f32],
|
||||||
|
limit: usize,
|
||||||
|
filter: Option<&RoaringBitmap>,
|
||||||
|
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Reader::open(txn, self.index, self.quantized_db())?
|
||||||
|
.nns_by_vector(txn, item, limit, None, None, filter)
|
||||||
|
} else {
|
||||||
|
arroy::Reader::open(txn, self.index, self.angular_db())?
|
||||||
|
.nns_by_vector(txn, item, limit, None, None, filter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn item_vector(&self, rtxn: &RoTxn, docid: u32) -> Result<Option<Vec<f32>>, arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
arroy::Reader::open(rtxn, self.index, self.quantized_db())?.item_vector(rtxn, docid)
|
||||||
|
} else {
|
||||||
|
arroy::Reader::open(rtxn, self.index, self.angular_db())?.item_vector(rtxn, docid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn angular_db(&self) -> arroy::Database<Angular> {
|
||||||
|
self.database.remap_data_type()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn quantized_db(&self) -> arroy::Database<BinaryQuantizedAngular> {
|
||||||
|
self.database.remap_data_type()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// One or multiple embeddings stored consecutively in a flat vector.
|
/// One or multiple embeddings stored consecutively in a flat vector.
|
||||||
pub struct Embeddings<F> {
|
pub struct Embeddings<F> {
|
||||||
data: Vec<F>,
|
data: Vec<F>,
|
||||||
@ -124,62 +293,48 @@ pub struct EmbeddingConfig {
|
|||||||
pub embedder_options: EmbedderOptions,
|
pub embedder_options: EmbedderOptions,
|
||||||
/// Document template
|
/// Document template
|
||||||
pub prompt: PromptData,
|
pub prompt: PromptData,
|
||||||
|
/// If this embedder is binary quantized
|
||||||
|
pub quantized: Option<bool>,
|
||||||
// TODO: add metrics and anything needed
|
// TODO: add metrics and anything needed
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl EmbeddingConfig {
|
||||||
|
pub fn quantized(&self) -> bool {
|
||||||
|
self.quantized.unwrap_or_default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Map of embedder configurations.
|
/// Map of embedder configurations.
|
||||||
///
|
///
|
||||||
/// Each configuration is mapped to a name.
|
/// Each configuration is mapped to a name.
|
||||||
#[derive(Clone, Default)]
|
#[derive(Clone, Default)]
|
||||||
pub struct EmbeddingConfigs(HashMap<String, (Arc<Embedder>, Arc<Prompt>)>);
|
pub struct EmbeddingConfigs(HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)>);
|
||||||
|
|
||||||
impl EmbeddingConfigs {
|
impl EmbeddingConfigs {
|
||||||
/// Create the map from its internal component.s
|
/// Create the map from its internal component.s
|
||||||
pub fn new(data: HashMap<String, (Arc<Embedder>, Arc<Prompt>)>) -> Self {
|
pub fn new(data: HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)>) -> Self {
|
||||||
Self(data)
|
Self(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get an embedder configuration and template from its name.
|
/// Get an embedder configuration and template from its name.
|
||||||
pub fn get(&self, name: &str) -> Option<(Arc<Embedder>, Arc<Prompt>)> {
|
pub fn get(&self, name: &str) -> Option<(Arc<Embedder>, Arc<Prompt>, bool)> {
|
||||||
self.0.get(name).cloned()
|
self.0.get(name).cloned()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the default embedder configuration, if any.
|
pub fn inner_as_ref(&self) -> &HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)> {
|
||||||
pub fn get_default(&self) -> Option<(Arc<Embedder>, Arc<Prompt>)> {
|
|
||||||
self.get(self.get_default_embedder_name())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn inner_as_ref(&self) -> &HashMap<String, (Arc<Embedder>, Arc<Prompt>)> {
|
|
||||||
&self.0
|
&self.0
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn into_inner(self) -> HashMap<String, (Arc<Embedder>, Arc<Prompt>)> {
|
pub fn into_inner(self) -> HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)> {
|
||||||
self.0
|
self.0
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the name of the default embedder configuration.
|
|
||||||
///
|
|
||||||
/// The default embedder is determined as follows:
|
|
||||||
///
|
|
||||||
/// - If there is only one embedder, it is always the default.
|
|
||||||
/// - If there are multiple embedders and one of them is called `default`, then that one is the default embedder.
|
|
||||||
/// - In all other cases, there is no default embedder.
|
|
||||||
pub fn get_default_embedder_name(&self) -> &str {
|
|
||||||
let mut it = self.0.keys();
|
|
||||||
let first_name = it.next();
|
|
||||||
let second_name = it.next();
|
|
||||||
match (first_name, second_name) {
|
|
||||||
(None, _) => "default",
|
|
||||||
(Some(first), None) => first,
|
|
||||||
(Some(_), Some(_)) => "default",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IntoIterator for EmbeddingConfigs {
|
impl IntoIterator for EmbeddingConfigs {
|
||||||
type Item = (String, (Arc<Embedder>, Arc<Prompt>));
|
type Item = (String, (Arc<Embedder>, Arc<Prompt>, bool));
|
||||||
|
|
||||||
type IntoIter = std::collections::hash_map::IntoIter<String, (Arc<Embedder>, Arc<Prompt>)>;
|
type IntoIter =
|
||||||
|
std::collections::hash_map::IntoIter<String, (Arc<Embedder>, Arc<Prompt>, bool)>;
|
||||||
|
|
||||||
fn into_iter(self) -> Self::IntoIter {
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
self.0.into_iter()
|
self.0.into_iter()
|
||||||
@ -305,6 +460,16 @@ impl Embedder {
|
|||||||
Embedder::Rest(embedder) => embedder.distribution(),
|
Embedder::Rest(embedder) => embedder.distribution(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn uses_document_template(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
Embedder::HuggingFace(_)
|
||||||
|
| Embedder::OpenAi(_)
|
||||||
|
| Embedder::Ollama(_)
|
||||||
|
| Embedder::Rest(_) => true,
|
||||||
|
Embedder::UserProvided(_) => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Describes the mean and sigma of distribution of embedding similarity in the embedding space.
|
/// Describes the mean and sigma of distribution of embedding similarity in the embedding space.
|
||||||
|
@ -66,11 +66,11 @@ pub enum EmbeddingModel {
|
|||||||
// # WARNING
|
// # WARNING
|
||||||
//
|
//
|
||||||
// If ever adding a model, make sure to add it to the list of supported models below.
|
// If ever adding a model, make sure to add it to the list of supported models below.
|
||||||
#[default]
|
|
||||||
#[serde(rename = "text-embedding-ada-002")]
|
#[serde(rename = "text-embedding-ada-002")]
|
||||||
#[deserr(rename = "text-embedding-ada-002")]
|
#[deserr(rename = "text-embedding-ada-002")]
|
||||||
TextEmbeddingAda002,
|
TextEmbeddingAda002,
|
||||||
|
|
||||||
|
#[default]
|
||||||
#[serde(rename = "text-embedding-3-small")]
|
#[serde(rename = "text-embedding-3-small")]
|
||||||
#[deserr(rename = "text-embedding-3-small")]
|
#[deserr(rename = "text-embedding-3-small")]
|
||||||
TextEmbedding3Small,
|
TextEmbedding3Small,
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
|
|
||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use super::{ollama, openai, DistributionShift};
|
use super::{ollama, openai, DistributionShift};
|
||||||
use crate::prompt::PromptData;
|
use crate::prompt::{default_max_bytes, PromptData};
|
||||||
use crate::update::Setting;
|
use crate::update::Setting;
|
||||||
use crate::vector::EmbeddingConfig;
|
use crate::vector::EmbeddingConfig;
|
||||||
use crate::UserError;
|
use crate::UserError;
|
||||||
@ -31,9 +32,15 @@ pub struct EmbeddingSettings {
|
|||||||
pub dimensions: Setting<usize>,
|
pub dimensions: Setting<usize>,
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
|
pub binary_quantized: Setting<bool>,
|
||||||
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
|
#[deserr(default)]
|
||||||
pub document_template: Setting<String>,
|
pub document_template: Setting<String>,
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
|
pub document_template_max_bytes: Setting<usize>,
|
||||||
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
|
#[deserr(default)]
|
||||||
pub url: Setting<String>,
|
pub url: Setting<String>,
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
@ -81,23 +88,63 @@ pub enum ReindexAction {
|
|||||||
|
|
||||||
pub enum SettingsDiff {
|
pub enum SettingsDiff {
|
||||||
Remove,
|
Remove,
|
||||||
Reindex { action: ReindexAction, updated_settings: EmbeddingSettings },
|
Reindex { action: ReindexAction, updated_settings: EmbeddingSettings, quantize: bool },
|
||||||
UpdateWithoutReindex { updated_settings: EmbeddingSettings },
|
UpdateWithoutReindex { updated_settings: EmbeddingSettings, quantize: bool },
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum EmbedderAction {
|
#[derive(Default, Debug)]
|
||||||
WriteBackToDocuments(WriteBackToDocuments),
|
pub struct EmbedderAction {
|
||||||
Reindex(ReindexAction),
|
pub was_quantized: bool,
|
||||||
|
pub is_being_quantized: bool,
|
||||||
|
pub write_back: Option<WriteBackToDocuments>,
|
||||||
|
pub reindex: Option<ReindexAction>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl EmbedderAction {
|
||||||
|
pub fn is_being_quantized(&self) -> bool {
|
||||||
|
self.is_being_quantized
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn write_back(&self) -> Option<&WriteBackToDocuments> {
|
||||||
|
self.write_back.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reindex(&self) -> Option<&ReindexAction> {
|
||||||
|
self.reindex.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_is_being_quantized(mut self, quantize: bool) -> Self {
|
||||||
|
self.is_being_quantized = quantize;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_write_back(write_back: WriteBackToDocuments, was_quantized: bool) -> Self {
|
||||||
|
Self {
|
||||||
|
was_quantized,
|
||||||
|
is_being_quantized: false,
|
||||||
|
write_back: Some(write_back),
|
||||||
|
reindex: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_reindex(reindex: ReindexAction, was_quantized: bool) -> Self {
|
||||||
|
Self { was_quantized, is_being_quantized: false, write_back: None, reindex: Some(reindex) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct WriteBackToDocuments {
|
pub struct WriteBackToDocuments {
|
||||||
pub embedder_id: u8,
|
pub embedder_id: u8,
|
||||||
pub user_provided: RoaringBitmap,
|
pub user_provided: RoaringBitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SettingsDiff {
|
impl SettingsDiff {
|
||||||
pub fn from_settings(old: EmbeddingSettings, new: Setting<EmbeddingSettings>) -> Self {
|
pub fn from_settings(
|
||||||
match new {
|
embedder_name: &str,
|
||||||
|
old: EmbeddingSettings,
|
||||||
|
new: Setting<EmbeddingSettings>,
|
||||||
|
) -> Result<Self, UserError> {
|
||||||
|
let ret = match new {
|
||||||
Setting::Set(new) => {
|
Setting::Set(new) => {
|
||||||
let EmbeddingSettings {
|
let EmbeddingSettings {
|
||||||
mut source,
|
mut source,
|
||||||
@ -111,6 +158,8 @@ impl SettingsDiff {
|
|||||||
mut response,
|
mut response,
|
||||||
mut distribution,
|
mut distribution,
|
||||||
mut headers,
|
mut headers,
|
||||||
|
mut document_template_max_bytes,
|
||||||
|
binary_quantized: mut binary_quantize,
|
||||||
} = old;
|
} = old;
|
||||||
|
|
||||||
let EmbeddingSettings {
|
let EmbeddingSettings {
|
||||||
@ -125,8 +174,18 @@ impl SettingsDiff {
|
|||||||
response: new_response,
|
response: new_response,
|
||||||
distribution: new_distribution,
|
distribution: new_distribution,
|
||||||
headers: new_headers,
|
headers: new_headers,
|
||||||
|
document_template_max_bytes: new_document_template_max_bytes,
|
||||||
|
binary_quantized: new_binary_quantize,
|
||||||
} = new;
|
} = new;
|
||||||
|
|
||||||
|
if matches!(binary_quantize, Setting::Set(true))
|
||||||
|
&& matches!(new_binary_quantize, Setting::Set(false))
|
||||||
|
{
|
||||||
|
return Err(UserError::InvalidDisableBinaryQuantization {
|
||||||
|
embedder_name: embedder_name.to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
let mut reindex_action = None;
|
let mut reindex_action = None;
|
||||||
|
|
||||||
// **Warning**: do not use short-circuiting || here, we want all these operations applied
|
// **Warning**: do not use short-circuiting || here, we want all these operations applied
|
||||||
@ -142,6 +201,7 @@ impl SettingsDiff {
|
|||||||
&mut request,
|
&mut request,
|
||||||
&mut response,
|
&mut response,
|
||||||
&mut document_template,
|
&mut document_template,
|
||||||
|
&mut document_template_max_bytes,
|
||||||
&mut headers,
|
&mut headers,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -165,6 +225,7 @@ impl SettingsDiff {
|
|||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
let binary_quantize_changed = binary_quantize.apply(new_binary_quantize);
|
||||||
if url.apply(new_url) {
|
if url.apply(new_url) {
|
||||||
match source {
|
match source {
|
||||||
// do not regenerate on an url change in OpenAI
|
// do not regenerate on an url change in OpenAI
|
||||||
@ -190,6 +251,23 @@ impl SettingsDiff {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if document_template_max_bytes.apply(new_document_template_max_bytes) {
|
||||||
|
let previous_document_template_max_bytes =
|
||||||
|
document_template_max_bytes.set().unwrap_or(default_max_bytes().get());
|
||||||
|
let new_document_template_max_bytes =
|
||||||
|
new_document_template_max_bytes.set().unwrap_or(default_max_bytes().get());
|
||||||
|
|
||||||
|
// only reindex if the size increased. Reasoning:
|
||||||
|
// - size decrease is a performance optimization, so we don't reindex and we keep the more accurate vectors
|
||||||
|
// - size increase is an accuracy optimization, so we want to reindex
|
||||||
|
if new_document_template_max_bytes > previous_document_template_max_bytes {
|
||||||
|
ReindexAction::push_action(
|
||||||
|
&mut reindex_action,
|
||||||
|
ReindexAction::RegeneratePrompts,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
distribution.apply(new_distribution);
|
distribution.apply(new_distribution);
|
||||||
api_key.apply(new_api_key);
|
api_key.apply(new_api_key);
|
||||||
headers.apply(new_headers);
|
headers.apply(new_headers);
|
||||||
@ -206,16 +284,28 @@ impl SettingsDiff {
|
|||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
|
document_template_max_bytes,
|
||||||
|
binary_quantized: binary_quantize,
|
||||||
};
|
};
|
||||||
|
|
||||||
match reindex_action {
|
match reindex_action {
|
||||||
Some(action) => Self::Reindex { action, updated_settings },
|
Some(action) => Self::Reindex {
|
||||||
None => Self::UpdateWithoutReindex { updated_settings },
|
action,
|
||||||
|
updated_settings,
|
||||||
|
quantize: binary_quantize_changed,
|
||||||
|
},
|
||||||
|
None => Self::UpdateWithoutReindex {
|
||||||
|
updated_settings,
|
||||||
|
quantize: binary_quantize_changed,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Setting::Reset => Self::Remove,
|
Setting::Reset => Self::Remove,
|
||||||
Setting::NotSet => Self::UpdateWithoutReindex { updated_settings: old },
|
Setting::NotSet => {
|
||||||
}
|
Self::UpdateWithoutReindex { updated_settings: old, quantize: false }
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(ret)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -239,6 +329,7 @@ fn apply_default_for_source(
|
|||||||
request: &mut Setting<serde_json::Value>,
|
request: &mut Setting<serde_json::Value>,
|
||||||
response: &mut Setting<serde_json::Value>,
|
response: &mut Setting<serde_json::Value>,
|
||||||
document_template: &mut Setting<String>,
|
document_template: &mut Setting<String>,
|
||||||
|
document_template_max_bytes: &mut Setting<usize>,
|
||||||
headers: &mut Setting<BTreeMap<String, String>>,
|
headers: &mut Setting<BTreeMap<String, String>>,
|
||||||
) {
|
) {
|
||||||
match source {
|
match source {
|
||||||
@ -286,6 +377,7 @@ fn apply_default_for_source(
|
|||||||
*request = Setting::NotSet;
|
*request = Setting::NotSet;
|
||||||
*response = Setting::NotSet;
|
*response = Setting::NotSet;
|
||||||
*document_template = Setting::NotSet;
|
*document_template = Setting::NotSet;
|
||||||
|
*document_template_max_bytes = Setting::NotSet;
|
||||||
*headers = Setting::NotSet;
|
*headers = Setting::NotSet;
|
||||||
}
|
}
|
||||||
Setting::NotSet => {}
|
Setting::NotSet => {}
|
||||||
@ -316,6 +408,7 @@ impl EmbeddingSettings {
|
|||||||
pub const API_KEY: &'static str = "apiKey";
|
pub const API_KEY: &'static str = "apiKey";
|
||||||
pub const DIMENSIONS: &'static str = "dimensions";
|
pub const DIMENSIONS: &'static str = "dimensions";
|
||||||
pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate";
|
pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate";
|
||||||
|
pub const DOCUMENT_TEMPLATE_MAX_BYTES: &'static str = "documentTemplateMaxBytes";
|
||||||
|
|
||||||
pub const URL: &'static str = "url";
|
pub const URL: &'static str = "url";
|
||||||
pub const REQUEST: &'static str = "request";
|
pub const REQUEST: &'static str = "request";
|
||||||
@ -458,7 +551,9 @@ impl std::fmt::Display for EmbedderSource {
|
|||||||
|
|
||||||
impl From<EmbeddingConfig> for EmbeddingSettings {
|
impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||||
fn from(value: EmbeddingConfig) -> Self {
|
fn from(value: EmbeddingConfig) -> Self {
|
||||||
let EmbeddingConfig { embedder_options, prompt } = value;
|
let EmbeddingConfig { embedder_options, prompt, quantized } = value;
|
||||||
|
let document_template_max_bytes =
|
||||||
|
Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get());
|
||||||
match embedder_options {
|
match embedder_options {
|
||||||
super::EmbedderOptions::HuggingFace(super::hf::EmbedderOptions {
|
super::EmbedderOptions::HuggingFace(super::hf::EmbedderOptions {
|
||||||
model,
|
model,
|
||||||
@ -471,11 +566,13 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
api_key: Setting::NotSet,
|
api_key: Setting::NotSet,
|
||||||
dimensions: Setting::NotSet,
|
dimensions: Setting::NotSet,
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
|
document_template_max_bytes,
|
||||||
url: Setting::NotSet,
|
url: Setting::NotSet,
|
||||||
request: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
headers: Setting::NotSet,
|
headers: Setting::NotSet,
|
||||||
distribution: Setting::some_or_not_set(distribution),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
|
binary_quantized: Setting::some_or_not_set(quantized),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
||||||
url,
|
url,
|
||||||
@ -490,11 +587,13 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
api_key: Setting::some_or_not_set(api_key),
|
api_key: Setting::some_or_not_set(api_key),
|
||||||
dimensions: Setting::some_or_not_set(dimensions),
|
dimensions: Setting::some_or_not_set(dimensions),
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
|
document_template_max_bytes,
|
||||||
url: Setting::some_or_not_set(url),
|
url: Setting::some_or_not_set(url),
|
||||||
request: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
headers: Setting::NotSet,
|
headers: Setting::NotSet,
|
||||||
distribution: Setting::some_or_not_set(distribution),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
|
binary_quantized: Setting::some_or_not_set(quantized),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
|
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
|
||||||
embedding_model,
|
embedding_model,
|
||||||
@ -509,11 +608,13 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
api_key: Setting::some_or_not_set(api_key),
|
api_key: Setting::some_or_not_set(api_key),
|
||||||
dimensions: Setting::some_or_not_set(dimensions),
|
dimensions: Setting::some_or_not_set(dimensions),
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
|
document_template_max_bytes,
|
||||||
url: Setting::some_or_not_set(url),
|
url: Setting::some_or_not_set(url),
|
||||||
request: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
headers: Setting::NotSet,
|
headers: Setting::NotSet,
|
||||||
distribution: Setting::some_or_not_set(distribution),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
|
binary_quantized: Setting::some_or_not_set(quantized),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
|
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
|
||||||
dimensions,
|
dimensions,
|
||||||
@ -525,11 +626,13 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
api_key: Setting::NotSet,
|
api_key: Setting::NotSet,
|
||||||
dimensions: Setting::Set(dimensions),
|
dimensions: Setting::Set(dimensions),
|
||||||
document_template: Setting::NotSet,
|
document_template: Setting::NotSet,
|
||||||
|
document_template_max_bytes: Setting::NotSet,
|
||||||
url: Setting::NotSet,
|
url: Setting::NotSet,
|
||||||
request: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
headers: Setting::NotSet,
|
headers: Setting::NotSet,
|
||||||
distribution: Setting::some_or_not_set(distribution),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
|
binary_quantized: Setting::some_or_not_set(quantized),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
||||||
api_key,
|
api_key,
|
||||||
@ -546,11 +649,13 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
api_key: Setting::some_or_not_set(api_key),
|
api_key: Setting::some_or_not_set(api_key),
|
||||||
dimensions: Setting::some_or_not_set(dimensions),
|
dimensions: Setting::some_or_not_set(dimensions),
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
|
document_template_max_bytes,
|
||||||
url: Setting::Set(url),
|
url: Setting::Set(url),
|
||||||
request: Setting::Set(request),
|
request: Setting::Set(request),
|
||||||
response: Setting::Set(response),
|
response: Setting::Set(response),
|
||||||
distribution: Setting::some_or_not_set(distribution),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
headers: Setting::Set(headers),
|
headers: Setting::Set(headers),
|
||||||
|
binary_quantized: Setting::some_or_not_set(quantized),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -566,13 +671,17 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
|||||||
api_key,
|
api_key,
|
||||||
dimensions,
|
dimensions,
|
||||||
document_template,
|
document_template,
|
||||||
|
document_template_max_bytes,
|
||||||
url,
|
url,
|
||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
|
binary_quantized,
|
||||||
} = value;
|
} = value;
|
||||||
|
|
||||||
|
this.quantized = binary_quantized.set();
|
||||||
|
|
||||||
if let Some(source) = source.set() {
|
if let Some(source) = source.set() {
|
||||||
match source {
|
match source {
|
||||||
EmbedderSource::OpenAi => {
|
EmbedderSource::OpenAi => {
|
||||||
@ -648,7 +757,12 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if let Setting::Set(template) = document_template {
|
if let Setting::Set(template) = document_template {
|
||||||
this.prompt = PromptData { template }
|
let max_bytes = document_template_max_bytes
|
||||||
|
.set()
|
||||||
|
.and_then(NonZeroUsize::new)
|
||||||
|
.unwrap_or(default_max_bytes());
|
||||||
|
|
||||||
|
this.prompt = PromptData { template, max_bytes: Some(max_bytes) }
|
||||||
}
|
}
|
||||||
|
|
||||||
this
|
this
|
||||||
|
Loading…
Reference in New Issue
Block a user