mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-12 06:24:29 +01:00
Merge #4815
4815: Rest embedder api mk2 r=ManyTheFish a=dureuill # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4756 - [x] [REST API parameter names and behavior are unclear](https://github.com/meilisearch/documentation/pull/2824#issuecomment-2124073720) - unclear names are removed. There remain only two parameters: `request`, a template of what Meilisearch's request to the embedding server should be, and `response`, a template of what the embedding server's response to Meilisearch should look like - [x] [Bad error message or bad default value when we don't specify the `query` parameter](85d8455c11/meilisearch/tests/vector/rest.rs (L105-L140)
) - The replacement for `query`, which is `request`, is now a mandatory parameter. Omitting it will result in the following error message : "`.embedders.rest`: Missing field `request` (note: this field is mandatory for source rest)", which is clear - [x] [Bad error message when both `pathToEmbeddings` and `embeddingObject` are missing](2141cb3b69/meilisearch/tests/vector/rest.rs (L142-L178)
) - These parameters no longer exist. Now, the point of extraction is given directly by the location of an `{{embedding}}` placeholder in the `response` parameter. - [x] [Unexpected error when we don't specify both `pathToEmbeddings` and `embeddingObject` (only once should be required)](2141cb3b69/meilisearch/tests/vector/rest.rs (L180-L260)
) - These parameters no longer exist. Now, the point of extraction is given directly by the location of an `{{embedding}}` placeholder in the `response` parameter. - [x] [Should not panic when the dimensions specified do not work with the model](2141cb3b69/meilisearch/tests/vector/rest.rs (L262-L299)
) - This no longer panics, instead returns "While embedding documents for embedder `rest`: runtime error: was expecting embeddings of dimension `2`, got embeddings of dimensions `3`" - [x] [Be more flexible on the type of data that is accepted](https://github.com/meilisearch/meilisearch/issues/4757#issuecomment-2201948531) - [x] Always accept arrays of embeddings even if `inputType` is set to `text` - This is controlled by the repeat placeholder `"{..}"`, an array of embeddings can be configured even if the input is not in an array. - [x] Accept arrays of result at the root level and texts/array of text at the root level. - doable with `request: "{{text}}"` and `response: "{{embedding}}"` or `response: ["{{embedding}}"]` (see test `vector::rest::server_raw`) ## What does this PR do? - [See public usage](https://meilisearch.notion.site/v1-10-AI-search-changes-737c9d7d010d4dd685582bf5dab579e2#8de842673ffa4a139210094a89c1ec3e) - Add new `milli::vector::json_template` module to parse JSON templates with an injection placeholder and a repeat placeholder - Change rest embedder to use two JSON templates - Change ollama and openai embedders to use the new rest embedder - Update settings - Update and add tests ## Breaking change > [!CAUTION] > This PR is a breaking change to the REST embedder. > Importing a dump containing a REST embedder configuration will fail in v1.10 with an error: "Error: unknown field `query`, expected one of `source`, `model`, `revision`, `apiKey`, `dimensions`, `documentTemplate`, `url`, `request`, `response`, `distribution` at line 1 column 752". Upgrade procedure: 1. Remove any embedder with source "rest" 2. Create a dump 3. Import that dump in a v1.10 4. Re-add any removed embedder, using the new settings. Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: Louis Dureuil <louis.dureuil@xinra.net> Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
c26bd68de5
82
Cargo.lock
generated
82
Cargo.lock
generated
@ -55,7 +55,7 @@ dependencies = [
|
|||||||
"encoding_rs",
|
"encoding_rs",
|
||||||
"flate2",
|
"flate2",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"h2",
|
"h2 0.3.26",
|
||||||
"http 0.2.11",
|
"http 0.2.11",
|
||||||
"httparse",
|
"httparse",
|
||||||
"httpdate",
|
"httpdate",
|
||||||
@ -403,6 +403,16 @@ dependencies = [
|
|||||||
"thiserror",
|
"thiserror",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "assert-json-diff"
|
||||||
|
version = "2.0.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-trait"
|
name = "async-trait"
|
||||||
version = "0.1.81"
|
version = "0.1.81"
|
||||||
@ -414,6 +424,12 @@ dependencies = [
|
|||||||
"syn 2.0.60",
|
"syn 2.0.60",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "atomic-waker"
|
||||||
|
version = "1.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "autocfg"
|
name = "autocfg"
|
||||||
version = "1.2.0"
|
version = "1.2.0"
|
||||||
@ -1377,6 +1393,24 @@ dependencies = [
|
|||||||
"syn 2.0.60",
|
"syn 2.0.60",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "deadpool"
|
||||||
|
version = "0.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fb84100978c1c7b37f09ed3ce3e5f843af02c2a2c431bae5b19230dad2c1b490"
|
||||||
|
dependencies = [
|
||||||
|
"async-trait",
|
||||||
|
"deadpool-runtime",
|
||||||
|
"num_cpus",
|
||||||
|
"tokio",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "deadpool-runtime"
|
||||||
|
version = "0.1.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "debugid"
|
name = "debugid"
|
||||||
version = "0.8.0"
|
version = "0.8.0"
|
||||||
@ -2231,6 +2265,25 @@ dependencies = [
|
|||||||
"tracing",
|
"tracing",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "h2"
|
||||||
|
version = "0.4.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab"
|
||||||
|
dependencies = [
|
||||||
|
"atomic-waker",
|
||||||
|
"bytes",
|
||||||
|
"fnv",
|
||||||
|
"futures-core",
|
||||||
|
"futures-sink",
|
||||||
|
"http 1.1.0",
|
||||||
|
"indexmap",
|
||||||
|
"slab",
|
||||||
|
"tokio",
|
||||||
|
"tokio-util",
|
||||||
|
"tracing",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "half"
|
name = "half"
|
||||||
version = "1.8.2"
|
version = "1.8.2"
|
||||||
@ -2441,9 +2494,11 @@ dependencies = [
|
|||||||
"bytes",
|
"bytes",
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
|
"h2 0.4.5",
|
||||||
"http 1.1.0",
|
"http 1.1.0",
|
||||||
"http-body",
|
"http-body",
|
||||||
"httparse",
|
"httparse",
|
||||||
|
"httpdate",
|
||||||
"itoa",
|
"itoa",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"smallvec",
|
"smallvec",
|
||||||
@ -3423,6 +3478,7 @@ dependencies = [
|
|||||||
"url",
|
"url",
|
||||||
"urlencoding",
|
"urlencoding",
|
||||||
"uuid",
|
"uuid",
|
||||||
|
"wiremock",
|
||||||
"yaup",
|
"yaup",
|
||||||
"zip 2.1.3",
|
"zip 2.1.3",
|
||||||
]
|
]
|
||||||
@ -6281,6 +6337,30 @@ dependencies = [
|
|||||||
"windows-sys 0.48.0",
|
"windows-sys 0.48.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wiremock"
|
||||||
|
version = "0.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ec874e1eef0df2dcac546057fe5e29186f09c378181cd7b635b4b7bcc98e9d81"
|
||||||
|
dependencies = [
|
||||||
|
"assert-json-diff",
|
||||||
|
"async-trait",
|
||||||
|
"base64 0.21.7",
|
||||||
|
"deadpool",
|
||||||
|
"futures",
|
||||||
|
"http 1.1.0",
|
||||||
|
"http-body-util",
|
||||||
|
"hyper",
|
||||||
|
"hyper-util",
|
||||||
|
"log",
|
||||||
|
"once_cell",
|
||||||
|
"regex",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"tokio",
|
||||||
|
"url",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wyz"
|
name = "wyz"
|
||||||
version = "0.5.1"
|
version = "0.5.1"
|
||||||
|
@ -3047,6 +3047,8 @@ mod tests {
|
|||||||
api_key: Setting::Set(S("My super secret")),
|
api_key: Setting::Set(S("My super secret")),
|
||||||
url: Setting::Set(S("http://localhost:7777")),
|
url: Setting::Set(S("http://localhost:7777")),
|
||||||
dimensions: Setting::Set(4),
|
dimensions: Setting::Set(4),
|
||||||
|
request: Setting::Set(serde_json::json!("{{text}}")),
|
||||||
|
response: Setting::Set(serde_json::json!("{{embedding}}")),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
embedders.insert(S("default"), Setting::Set(embedding_settings));
|
embedders.insert(S("default"), Setting::Set(embedding_settings));
|
||||||
@ -5006,6 +5008,8 @@ mod tests {
|
|||||||
api_key: Setting::Set(S("My super secret")),
|
api_key: Setting::Set(S("My super secret")),
|
||||||
url: Setting::Set(S("http://localhost:7777")),
|
url: Setting::Set(S("http://localhost:7777")),
|
||||||
dimensions: Setting::Set(384),
|
dimensions: Setting::Set(384),
|
||||||
|
request: Setting::Set(serde_json::json!("{{text}}")),
|
||||||
|
response: Setting::Set(serde_json::json!("{{embedding}}")),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
embedders.insert(S("A_fakerest"), Setting::Set(embedding_settings));
|
embedders.insert(S("A_fakerest"), Setting::Set(embedding_settings));
|
||||||
|
@ -8,7 +8,9 @@ expression: task.details
|
|||||||
"source": "rest",
|
"source": "rest",
|
||||||
"apiKey": "MyXXXX...",
|
"apiKey": "MyXXXX...",
|
||||||
"dimensions": 384,
|
"dimensions": 384,
|
||||||
"url": "http://localhost:7777"
|
"url": "http://localhost:7777",
|
||||||
|
"request": "{{text}}",
|
||||||
|
"response": "{{embedding}}"
|
||||||
},
|
},
|
||||||
"B_small_hf": {
|
"B_small_hf": {
|
||||||
"source": "huggingFace",
|
"source": "huggingFace",
|
||||||
|
@ -8,16 +8,7 @@ expression: fakerest_config.embedder_options
|
|||||||
"distribution": null,
|
"distribution": null,
|
||||||
"dimensions": 384,
|
"dimensions": 384,
|
||||||
"url": "http://localhost:7777",
|
"url": "http://localhost:7777",
|
||||||
"query": null,
|
"request": "{{text}}",
|
||||||
"input_field": [
|
"response": "{{embedding}}"
|
||||||
"input"
|
|
||||||
],
|
|
||||||
"path_to_embeddings": [
|
|
||||||
"data"
|
|
||||||
],
|
|
||||||
"embedding_object": [
|
|
||||||
"embedding"
|
|
||||||
],
|
|
||||||
"input_type": "text"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,9 @@ expression: task.details
|
|||||||
"source": "rest",
|
"source": "rest",
|
||||||
"apiKey": "MyXXXX...",
|
"apiKey": "MyXXXX...",
|
||||||
"dimensions": 384,
|
"dimensions": 384,
|
||||||
"url": "http://localhost:7777"
|
"url": "http://localhost:7777",
|
||||||
|
"request": "{{text}}",
|
||||||
|
"response": "{{embedding}}"
|
||||||
},
|
},
|
||||||
"B_small_hf": {
|
"B_small_hf": {
|
||||||
"source": "huggingFace",
|
"source": "huggingFace",
|
||||||
|
@ -8,7 +8,9 @@ expression: task.details
|
|||||||
"source": "rest",
|
"source": "rest",
|
||||||
"apiKey": "MyXXXX...",
|
"apiKey": "MyXXXX...",
|
||||||
"dimensions": 4,
|
"dimensions": 4,
|
||||||
"url": "http://localhost:7777"
|
"url": "http://localhost:7777",
|
||||||
|
"request": "{{text}}",
|
||||||
|
"response": "{{embedding}}"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
---
|
---
|
||||||
source: index-scheduler/src/lib.rs
|
source: index-scheduler/src/lib.rs
|
||||||
expression: embedding_config.embedder_options
|
expression: config.embedder_options
|
||||||
---
|
---
|
||||||
{
|
{
|
||||||
"Rest": {
|
"Rest": {
|
||||||
@ -8,16 +8,7 @@ expression: embedding_config.embedder_options
|
|||||||
"distribution": null,
|
"distribution": null,
|
||||||
"dimensions": 4,
|
"dimensions": 4,
|
||||||
"url": "http://localhost:7777",
|
"url": "http://localhost:7777",
|
||||||
"query": null,
|
"request": "{{text}}",
|
||||||
"input_field": [
|
"response": "{{embedding}}"
|
||||||
"input"
|
|
||||||
],
|
|
||||||
"path_to_embeddings": [
|
|
||||||
"data"
|
|
||||||
],
|
|
||||||
"embedding_object": [
|
|
||||||
"embedding"
|
|
||||||
],
|
|
||||||
"input_type": "text"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,9 @@ expression: task.details
|
|||||||
"source": "rest",
|
"source": "rest",
|
||||||
"apiKey": "MyXXXX...",
|
"apiKey": "MyXXXX...",
|
||||||
"dimensions": 4,
|
"dimensions": 4,
|
||||||
"url": "http://localhost:7777"
|
"url": "http://localhost:7777",
|
||||||
|
"request": "{{text}}",
|
||||||
|
"response": "{{embedding}}"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
@ -46,4 +46,3 @@ doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1
|
|||||||
### File Store:
|
### File Store:
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
@ -45,4 +45,3 @@ doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1
|
|||||||
00000000-0000-0000-0000-000000000001
|
00000000-0000-0000-0000-000000000001
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
@ -42,4 +42,3 @@ doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1
|
|||||||
### File Store:
|
### File Store:
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
@ -41,4 +41,3 @@ doggos: { number_of_documents: 0, field_distribution: {} }
|
|||||||
00000000-0000-0000-0000-000000000000
|
00000000-0000-0000-0000-000000000000
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
@ -33,4 +33,3 @@ doggos [0,]
|
|||||||
### File Store:
|
### File Store:
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued []
|
enqueued []
|
||||||
@ -37,4 +37,3 @@ doggos: { number_of_documents: 0, field_distribution: {} }
|
|||||||
### File Store:
|
### File Store:
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
@ -33,4 +33,3 @@ doggos [0,]
|
|||||||
### File Store:
|
### File Store:
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued []
|
enqueued []
|
||||||
@ -37,4 +37,3 @@ doggos: { number_of_documents: 0, field_distribution: {} }
|
|||||||
### File Store:
|
### File Store:
|
||||||
|
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -114,6 +114,7 @@ maplit = "1.0.2"
|
|||||||
meili-snap = { path = "../meili-snap" }
|
meili-snap = { path = "../meili-snap" }
|
||||||
temp-env = "0.3.6"
|
temp-env = "0.3.6"
|
||||||
urlencoding = "2.1.3"
|
urlencoding = "2.1.3"
|
||||||
|
wiremock = "0.6.0"
|
||||||
yaup = "0.3.1"
|
yaup = "0.3.1"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
|
@ -80,7 +80,14 @@ impl Display for Value {
|
|||||||
write!(
|
write!(
|
||||||
f,
|
f,
|
||||||
"{}",
|
"{}",
|
||||||
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]", ".processingTimeMs" => "[duration]" })
|
json_string!(self, {
|
||||||
|
".enqueuedAt" => "[date]",
|
||||||
|
".startedAt" => "[date]",
|
||||||
|
".finishedAt" => "[date]",
|
||||||
|
".duration" => "[duration]",
|
||||||
|
".processingTimeMs" => "[duration]",
|
||||||
|
".details.embedders.*.url" => "[url]"
|
||||||
|
})
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -497,7 +497,7 @@ async fn query_combination() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r###"
|
||||||
{
|
{
|
||||||
"message": "Error while generating embeddings: user error: attempt to embed the following text in a configuration where embeddings must be user provided: \"Captain\"",
|
"message": "Error while generating embeddings: user error: attempt to embed the following text in a configuration where embeddings must be user provided:\n - `Captain`",
|
||||||
"code": "vector_embedding_error",
|
"code": "vector_embedding_error",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
|
@ -116,6 +116,8 @@ async fn secrets_are_hidden_in_settings() {
|
|||||||
"url": "https://localhost:7777",
|
"url": "https://localhost:7777",
|
||||||
"apiKey": "My super secret value you will never guess",
|
"apiKey": "My super secret value you will never guess",
|
||||||
"dimensions": 4,
|
"dimensions": 4,
|
||||||
|
"request": "{{text}}",
|
||||||
|
"response": "{{embedding}}"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}))
|
}))
|
||||||
@ -189,17 +191,8 @@ async fn secrets_are_hidden_in_settings() {
|
|||||||
"dimensions": 4,
|
"dimensions": 4,
|
||||||
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
||||||
"url": "https://localhost:7777",
|
"url": "https://localhost:7777",
|
||||||
"query": null,
|
"request": "{{text}}",
|
||||||
"inputField": [
|
"response": "{{embedding}}"
|
||||||
"input"
|
|
||||||
],
|
|
||||||
"pathToEmbeddings": [
|
|
||||||
"data"
|
|
||||||
],
|
|
||||||
"embeddingObject": [
|
|
||||||
"embedding"
|
|
||||||
],
|
|
||||||
"inputType": "text"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"searchCutoffMs": null
|
"searchCutoffMs": null
|
||||||
@ -215,7 +208,9 @@ async fn secrets_are_hidden_in_settings() {
|
|||||||
"source": "rest",
|
"source": "rest",
|
||||||
"apiKey": "My suXXXXXX...",
|
"apiKey": "My suXXXXXX...",
|
||||||
"dimensions": 4,
|
"dimensions": 4,
|
||||||
"url": "https://localhost:7777"
|
"url": "https://localhost:7777",
|
||||||
|
"request": "{{text}}",
|
||||||
|
"response": "{{embedding}}"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
mod rest;
|
||||||
mod settings;
|
mod settings;
|
||||||
|
|
||||||
use meili_snap::{json_string, snapshot};
|
use meili_snap::{json_string, snapshot};
|
||||||
@ -505,7 +506,7 @@ async fn user_provided_vectors_error() {
|
|||||||
"indexedDocuments": 0
|
"indexedDocuments": 0
|
||||||
},
|
},
|
||||||
"error": {
|
"error": {
|
||||||
"message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided: \" id: 42\\n name: kefir\\n _vectors: \\n _vectors.manual: \\n _vectors.manual.regenerate: \\n _vectors.manual.embeddings: \\n\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
|
"message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided:\n - ` id: 42\n name: kefir\n _vectors: \n _vectors.manual: \n _vectors.manual.regenerate: \n _vectors.manual.embeddings: \n`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
|
||||||
"code": "vector_embedding_error",
|
"code": "vector_embedding_error",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
@ -534,7 +535,7 @@ async fn user_provided_vectors_error() {
|
|||||||
"indexedDocuments": 0
|
"indexedDocuments": 0
|
||||||
},
|
},
|
||||||
"error": {
|
"error": {
|
||||||
"message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided: \" id: 42\\n name: kefir\\n _vectors: \\n _vectors.manual: \\n _vectors.manual.regenerate: \\n _vectors.manual.embeddings: \\n _vector: manaul000\\n _vector.manaul: \\n\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
|
"message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided:\n - ` id: 42\n name: kefir\n _vectors: \n _vectors.manual: \n _vectors.manual.regenerate: \n _vectors.manual.embeddings: \n _vector: manaul000\n _vector.manaul: \n`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
|
||||||
"code": "vector_embedding_error",
|
"code": "vector_embedding_error",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
@ -563,7 +564,7 @@ async fn user_provided_vectors_error() {
|
|||||||
"indexedDocuments": 0
|
"indexedDocuments": 0
|
||||||
},
|
},
|
||||||
"error": {
|
"error": {
|
||||||
"message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided: \" id: 42\\n name: kefir\\n _vectors: manaul000\\n _vectors.manual: \\n _vectors.manual.regenerate: \\n _vectors.manual.embeddings: \\n _vectors.manaul: \\n\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
|
"message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided:\n - ` id: 42\n name: kefir\n _vectors: manaul000\n _vectors.manual: \n _vectors.manual.regenerate: \n _vectors.manual.embeddings: \n _vectors.manaul: \n`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
|
||||||
"code": "vector_embedding_error",
|
"code": "vector_embedding_error",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
|
1734
meilisearch/tests/vector/rest.rs
Normal file
1734
meilisearch/tests/vector/rest.rs
Normal file
File diff suppressed because it is too large
Load Diff
@ -2741,11 +2741,8 @@ mod tests {
|
|||||||
dimensions: Setting::Set(3),
|
dimensions: Setting::Set(3),
|
||||||
document_template: Setting::NotSet,
|
document_template: Setting::NotSet,
|
||||||
url: Setting::NotSet,
|
url: Setting::NotSet,
|
||||||
query: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
input_field: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
path_to_embeddings: Setting::NotSet,
|
|
||||||
embedding_object: Setting::NotSet,
|
|
||||||
input_type: Setting::NotSet,
|
|
||||||
distribution: Setting::NotSet,
|
distribution: Setting::NotSet,
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
@ -1484,11 +1484,8 @@ fn validate_prompt(
|
|||||||
dimensions,
|
dimensions,
|
||||||
document_template: Setting::Set(template),
|
document_template: Setting::Set(template),
|
||||||
url,
|
url,
|
||||||
query,
|
request,
|
||||||
input_field,
|
response,
|
||||||
path_to_embeddings,
|
|
||||||
embedding_object,
|
|
||||||
input_type,
|
|
||||||
distribution,
|
distribution,
|
||||||
}) => {
|
}) => {
|
||||||
// validate
|
// validate
|
||||||
@ -1504,11 +1501,8 @@ fn validate_prompt(
|
|||||||
dimensions,
|
dimensions,
|
||||||
document_template: Setting::Set(template),
|
document_template: Setting::Set(template),
|
||||||
url,
|
url,
|
||||||
query,
|
request,
|
||||||
input_field,
|
response,
|
||||||
path_to_embeddings,
|
|
||||||
embedding_object,
|
|
||||||
input_type,
|
|
||||||
distribution,
|
distribution,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
@ -1530,11 +1524,8 @@ pub fn validate_embedding_settings(
|
|||||||
dimensions,
|
dimensions,
|
||||||
document_template,
|
document_template,
|
||||||
url,
|
url,
|
||||||
query,
|
request,
|
||||||
input_field,
|
response,
|
||||||
path_to_embeddings,
|
|
||||||
embedding_object,
|
|
||||||
input_type,
|
|
||||||
distribution,
|
distribution,
|
||||||
} = settings;
|
} = settings;
|
||||||
|
|
||||||
@ -1553,6 +1544,15 @@ pub fn validate_embedding_settings(
|
|||||||
})?;
|
})?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(request) = request.as_ref().set() {
|
||||||
|
let request = crate::vector::rest::Request::new(request.to_owned())
|
||||||
|
.map_err(|error| crate::UserError::VectorEmbeddingError(error.into()))?;
|
||||||
|
if let Some(response) = response.as_ref().set() {
|
||||||
|
crate::vector::rest::Response::new(response.to_owned(), &request)
|
||||||
|
.map_err(|error| crate::UserError::VectorEmbeddingError(error.into()))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let Some(inferred_source) = source.set() else {
|
let Some(inferred_source) = source.set() else {
|
||||||
return Ok(Setting::Set(EmbeddingSettings {
|
return Ok(Setting::Set(EmbeddingSettings {
|
||||||
source,
|
source,
|
||||||
@ -1562,11 +1562,8 @@ pub fn validate_embedding_settings(
|
|||||||
dimensions,
|
dimensions,
|
||||||
document_template,
|
document_template,
|
||||||
url,
|
url,
|
||||||
query,
|
request,
|
||||||
input_field,
|
response,
|
||||||
path_to_embeddings,
|
|
||||||
embedding_object,
|
|
||||||
input_type,
|
|
||||||
distribution,
|
distribution,
|
||||||
}));
|
}));
|
||||||
};
|
};
|
||||||
@ -1574,21 +1571,8 @@ pub fn validate_embedding_settings(
|
|||||||
EmbedderSource::OpenAi => {
|
EmbedderSource::OpenAi => {
|
||||||
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
|
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
|
||||||
|
|
||||||
check_unset(&query, EmbeddingSettings::QUERY, inferred_source, name)?;
|
check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
|
||||||
check_unset(&input_field, EmbeddingSettings::INPUT_FIELD, inferred_source, name)?;
|
check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
|
||||||
check_unset(
|
|
||||||
&path_to_embeddings,
|
|
||||||
EmbeddingSettings::PATH_TO_EMBEDDINGS,
|
|
||||||
inferred_source,
|
|
||||||
name,
|
|
||||||
)?;
|
|
||||||
check_unset(
|
|
||||||
&embedding_object,
|
|
||||||
EmbeddingSettings::EMBEDDING_OBJECT,
|
|
||||||
inferred_source,
|
|
||||||
name,
|
|
||||||
)?;
|
|
||||||
check_unset(&input_type, EmbeddingSettings::INPUT_TYPE, inferred_source, name)?;
|
|
||||||
|
|
||||||
if let Setting::Set(model) = &model {
|
if let Setting::Set(model) = &model {
|
||||||
let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str())
|
let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str())
|
||||||
@ -1626,42 +1610,16 @@ pub fn validate_embedding_settings(
|
|||||||
check_set(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
|
check_set(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
|
||||||
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
|
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
|
||||||
|
|
||||||
check_unset(&query, EmbeddingSettings::QUERY, inferred_source, name)?;
|
check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
|
||||||
check_unset(&input_field, EmbeddingSettings::INPUT_FIELD, inferred_source, name)?;
|
check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
|
||||||
check_unset(
|
|
||||||
&path_to_embeddings,
|
|
||||||
EmbeddingSettings::PATH_TO_EMBEDDINGS,
|
|
||||||
inferred_source,
|
|
||||||
name,
|
|
||||||
)?;
|
|
||||||
check_unset(
|
|
||||||
&embedding_object,
|
|
||||||
EmbeddingSettings::EMBEDDING_OBJECT,
|
|
||||||
inferred_source,
|
|
||||||
name,
|
|
||||||
)?;
|
|
||||||
check_unset(&input_type, EmbeddingSettings::INPUT_TYPE, inferred_source, name)?;
|
|
||||||
}
|
}
|
||||||
EmbedderSource::HuggingFace => {
|
EmbedderSource::HuggingFace => {
|
||||||
check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?;
|
check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?;
|
||||||
check_unset(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?;
|
check_unset(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?;
|
||||||
|
|
||||||
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
|
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
|
||||||
check_unset(&query, EmbeddingSettings::QUERY, inferred_source, name)?;
|
check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
|
||||||
check_unset(&input_field, EmbeddingSettings::INPUT_FIELD, inferred_source, name)?;
|
check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
|
||||||
check_unset(
|
|
||||||
&path_to_embeddings,
|
|
||||||
EmbeddingSettings::PATH_TO_EMBEDDINGS,
|
|
||||||
inferred_source,
|
|
||||||
name,
|
|
||||||
)?;
|
|
||||||
check_unset(
|
|
||||||
&embedding_object,
|
|
||||||
EmbeddingSettings::EMBEDDING_OBJECT,
|
|
||||||
inferred_source,
|
|
||||||
name,
|
|
||||||
)?;
|
|
||||||
check_unset(&input_type, EmbeddingSettings::INPUT_TYPE, inferred_source, name)?;
|
|
||||||
}
|
}
|
||||||
EmbedderSource::UserProvided => {
|
EmbedderSource::UserProvided => {
|
||||||
check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
|
check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
|
||||||
@ -1676,26 +1634,15 @@ pub fn validate_embedding_settings(
|
|||||||
check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?;
|
check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?;
|
||||||
|
|
||||||
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
|
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
|
||||||
check_unset(&query, EmbeddingSettings::QUERY, inferred_source, name)?;
|
check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
|
||||||
check_unset(&input_field, EmbeddingSettings::INPUT_FIELD, inferred_source, name)?;
|
check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
|
||||||
check_unset(
|
|
||||||
&path_to_embeddings,
|
|
||||||
EmbeddingSettings::PATH_TO_EMBEDDINGS,
|
|
||||||
inferred_source,
|
|
||||||
name,
|
|
||||||
)?;
|
|
||||||
check_unset(
|
|
||||||
&embedding_object,
|
|
||||||
EmbeddingSettings::EMBEDDING_OBJECT,
|
|
||||||
inferred_source,
|
|
||||||
name,
|
|
||||||
)?;
|
|
||||||
check_unset(&input_type, EmbeddingSettings::INPUT_TYPE, inferred_source, name)?;
|
|
||||||
}
|
}
|
||||||
EmbedderSource::Rest => {
|
EmbedderSource::Rest => {
|
||||||
check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
|
check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
|
||||||
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
|
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
|
||||||
check_set(&url, EmbeddingSettings::URL, inferred_source, name)?;
|
check_set(&url, EmbeddingSettings::URL, inferred_source, name)?;
|
||||||
|
check_set(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
|
||||||
|
check_set(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(Setting::Set(EmbeddingSettings {
|
Ok(Setting::Set(EmbeddingSettings {
|
||||||
@ -1706,11 +1653,8 @@ pub fn validate_embedding_settings(
|
|||||||
dimensions,
|
dimensions,
|
||||||
document_template,
|
document_template,
|
||||||
url,
|
url,
|
||||||
query,
|
request,
|
||||||
input_field,
|
response,
|
||||||
path_to_embeddings,
|
|
||||||
embedding_object,
|
|
||||||
input_type,
|
|
||||||
distribution,
|
distribution,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ use std::path::PathBuf;
|
|||||||
use hf_hub::api::sync::ApiError;
|
use hf_hub::api::sync::ApiError;
|
||||||
|
|
||||||
use super::parsed_vectors::ParsedVectorsDiff;
|
use super::parsed_vectors::ParsedVectorsDiff;
|
||||||
|
use super::rest::ConfigurationSource;
|
||||||
use crate::error::FaultSource;
|
use crate::error::FaultSource;
|
||||||
use crate::{FieldDistribution, PanicCatched};
|
use crate::{FieldDistribution, PanicCatched};
|
||||||
|
|
||||||
@ -45,48 +46,57 @@ pub struct EmbedError {
|
|||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
pub enum EmbedErrorKind {
|
pub enum EmbedErrorKind {
|
||||||
#[error("could not tokenize: {0}")]
|
#[error("could not tokenize:\n - {0}")]
|
||||||
Tokenize(Box<dyn std::error::Error + Send + Sync>),
|
Tokenize(Box<dyn std::error::Error + Send + Sync>),
|
||||||
#[error("unexpected tensor shape: {0}")]
|
#[error("unexpected tensor shape:\n - {0}")]
|
||||||
TensorShape(candle_core::Error),
|
TensorShape(candle_core::Error),
|
||||||
#[error("unexpected tensor value: {0}")]
|
#[error("unexpected tensor value:\n - {0}")]
|
||||||
TensorValue(candle_core::Error),
|
TensorValue(candle_core::Error),
|
||||||
#[error("could not run model: {0}")]
|
#[error("could not run model:\n - {0}")]
|
||||||
ModelForward(candle_core::Error),
|
ModelForward(candle_core::Error),
|
||||||
#[error("attempt to embed the following text in a configuration where embeddings must be user provided: {0:?}")]
|
#[error("attempt to embed the following text in a configuration where embeddings must be user provided:\n - `{0}`")]
|
||||||
ManualEmbed(String),
|
ManualEmbed(String),
|
||||||
#[error("model not found. Meilisearch will not automatically download models from the Ollama library, please pull the model manually: {0:?}")]
|
#[error("model not found. Meilisearch will not automatically download models from the Ollama library, please pull the model manually{}", option_info(.0.as_deref(), "server replied with "))]
|
||||||
OllamaModelNotFoundError(Option<String>),
|
OllamaModelNotFoundError(Option<String>),
|
||||||
#[error("error deserialization the response body as JSON: {0}")]
|
#[error("error deserialization the response body as JSON:\n - {0}")]
|
||||||
RestResponseDeserialization(std::io::Error),
|
RestResponseDeserialization(std::io::Error),
|
||||||
#[error("component `{0}` not found in path `{1}` in response: `{2}`")]
|
|
||||||
RestResponseMissingEmbeddings(String, String, String),
|
|
||||||
#[error("unexpected format of the embedding response: {0}")]
|
|
||||||
RestResponseFormat(serde_json::Error),
|
|
||||||
#[error("expected a response containing {0} embeddings, got only {1}")]
|
#[error("expected a response containing {0} embeddings, got only {1}")]
|
||||||
RestResponseEmbeddingCount(usize, usize),
|
RestResponseEmbeddingCount(usize, usize),
|
||||||
#[error("could not authenticate against embedding server: {0:?}")]
|
#[error("could not authenticate against embedding server{}", option_info(.0.as_deref(), "server replied with "))]
|
||||||
RestUnauthorized(Option<String>),
|
RestUnauthorized(Option<String>),
|
||||||
#[error("sent too many requests to embedding server: {0:?}")]
|
#[error("sent too many requests to embedding server{}", option_info(.0.as_deref(), "server replied with "))]
|
||||||
RestTooManyRequests(Option<String>),
|
RestTooManyRequests(Option<String>),
|
||||||
#[error("sent a bad request to embedding server: {0:?}")]
|
#[error("sent a bad request to embedding server{}{}",
|
||||||
RestBadRequest(Option<String>),
|
if ConfigurationSource::User == *.1 {
|
||||||
#[error("received internal error from embedding server: {0:?}")]
|
"\n - Hint: check that the `request` in the embedder configuration matches the remote server's API"
|
||||||
|
} else {
|
||||||
|
""
|
||||||
|
},
|
||||||
|
option_info(.0.as_deref(), "server replied with "))]
|
||||||
|
RestBadRequest(Option<String>, ConfigurationSource),
|
||||||
|
#[error("received internal error HTTP {0} from embedding server{}", option_info(.1.as_deref(), "server replied with "))]
|
||||||
RestInternalServerError(u16, Option<String>),
|
RestInternalServerError(u16, Option<String>),
|
||||||
#[error("received HTTP {0} from embedding server: {0:?}")]
|
#[error("received unexpected HTTP {0} from embedding server{}", option_info(.1.as_deref(), "server replied with "))]
|
||||||
RestOtherStatusCode(u16, Option<String>),
|
RestOtherStatusCode(u16, Option<String>),
|
||||||
#[error("could not reach embedding server: {0}")]
|
#[error("could not reach embedding server:\n - {0}")]
|
||||||
RestNetwork(ureq::Transport),
|
RestNetwork(ureq::Transport),
|
||||||
#[error("was expected '{}' to be an object in query '{0}'", .1.join("."))]
|
#[error("error extracting embeddings from the response:\n - {0}")]
|
||||||
RestNotAnObject(serde_json::Value, Vec<String>),
|
RestExtractionError(String),
|
||||||
#[error("while embedding tokenized, was expecting embeddings of dimension `{0}`, got embeddings of dimensions `{1}`")]
|
#[error("was expecting embeddings of dimension `{0}`, got embeddings of dimensions `{1}`")]
|
||||||
OpenAiUnexpectedDimension(usize, usize),
|
UnexpectedDimension(usize, usize),
|
||||||
#[error("no embedding was produced")]
|
#[error("no embedding was produced")]
|
||||||
MissingEmbedding,
|
MissingEmbedding,
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
PanicInThreadPool(#[from] PanicCatched),
|
PanicInThreadPool(#[from] PanicCatched),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn option_info(info: Option<&str>, prefix: &str) -> String {
|
||||||
|
match info {
|
||||||
|
Some(info) => format!("\n - {prefix}`{info}`"),
|
||||||
|
None => String::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl EmbedError {
|
impl EmbedError {
|
||||||
pub fn tokenize(inner: Box<dyn std::error::Error + Send + Sync>) -> Self {
|
pub fn tokenize(inner: Box<dyn std::error::Error + Send + Sync>) -> Self {
|
||||||
Self { kind: EmbedErrorKind::Tokenize(inner), fault: FaultSource::Runtime }
|
Self { kind: EmbedErrorKind::Tokenize(inner), fault: FaultSource::Runtime }
|
||||||
@ -119,28 +129,6 @@ impl EmbedError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn rest_response_missing_embeddings<S: AsRef<str>>(
|
|
||||||
response: serde_json::Value,
|
|
||||||
component: &str,
|
|
||||||
response_field: &[S],
|
|
||||||
) -> EmbedError {
|
|
||||||
let response_field: Vec<&str> = response_field.iter().map(AsRef::as_ref).collect();
|
|
||||||
let response_field = response_field.join(".");
|
|
||||||
|
|
||||||
Self {
|
|
||||||
kind: EmbedErrorKind::RestResponseMissingEmbeddings(
|
|
||||||
component.to_owned(),
|
|
||||||
response_field,
|
|
||||||
serde_json::to_string_pretty(&response).unwrap_or_default(),
|
|
||||||
),
|
|
||||||
fault: FaultSource::Undecided,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn rest_response_format(error: serde_json::Error) -> EmbedError {
|
|
||||||
Self { kind: EmbedErrorKind::RestResponseFormat(error), fault: FaultSource::Undecided }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn rest_response_embedding_count(expected: usize, got: usize) -> EmbedError {
|
pub(crate) fn rest_response_embedding_count(expected: usize, got: usize) -> EmbedError {
|
||||||
Self {
|
Self {
|
||||||
kind: EmbedErrorKind::RestResponseEmbeddingCount(expected, got),
|
kind: EmbedErrorKind::RestResponseEmbeddingCount(expected, got),
|
||||||
@ -159,8 +147,14 @@ impl EmbedError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn rest_bad_request(error_response: Option<String>) -> EmbedError {
|
pub(crate) fn rest_bad_request(
|
||||||
Self { kind: EmbedErrorKind::RestBadRequest(error_response), fault: FaultSource::User }
|
error_response: Option<String>,
|
||||||
|
configuration_source: ConfigurationSource,
|
||||||
|
) -> EmbedError {
|
||||||
|
Self {
|
||||||
|
kind: EmbedErrorKind::RestBadRequest(error_response, configuration_source),
|
||||||
|
fault: FaultSource::User,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn rest_internal_server_error(
|
pub(crate) fn rest_internal_server_error(
|
||||||
@ -184,22 +178,19 @@ impl EmbedError {
|
|||||||
Self { kind: EmbedErrorKind::RestNetwork(transport), fault: FaultSource::Runtime }
|
Self { kind: EmbedErrorKind::RestNetwork(transport), fault: FaultSource::Runtime }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn rest_not_an_object(
|
pub(crate) fn rest_unexpected_dimension(expected: usize, got: usize) -> EmbedError {
|
||||||
query: serde_json::Value,
|
|
||||||
input_path: Vec<String>,
|
|
||||||
) -> EmbedError {
|
|
||||||
Self { kind: EmbedErrorKind::RestNotAnObject(query, input_path), fault: FaultSource::User }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn openai_unexpected_dimension(expected: usize, got: usize) -> EmbedError {
|
|
||||||
Self {
|
Self {
|
||||||
kind: EmbedErrorKind::OpenAiUnexpectedDimension(expected, got),
|
kind: EmbedErrorKind::UnexpectedDimension(expected, got),
|
||||||
fault: FaultSource::Runtime,
|
fault: FaultSource::Runtime,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub(crate) fn missing_embedding() -> EmbedError {
|
pub(crate) fn missing_embedding() -> EmbedError {
|
||||||
Self { kind: EmbedErrorKind::MissingEmbedding, fault: FaultSource::Undecided }
|
Self { kind: EmbedErrorKind::MissingEmbedding, fault: FaultSource::Undecided }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn rest_extraction_error(error: String) -> EmbedError {
|
||||||
|
Self { kind: EmbedErrorKind::RestExtractionError(error), fault: FaultSource::Runtime }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
@ -290,10 +281,17 @@ impl NewEmbedderError {
|
|||||||
fault: FaultSource::Runtime,
|
fault: FaultSource::Runtime,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn rest_could_not_parse_template(message: String) -> NewEmbedderError {
|
||||||
|
Self {
|
||||||
|
kind: NewEmbedderErrorKind::CouldNotParseTemplate(message),
|
||||||
|
fault: FaultSource::User,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
#[error("could not open config at {filename:?}: {inner}")]
|
#[error("could not open config at {filename}: {inner}")]
|
||||||
pub struct OpenConfig {
|
pub struct OpenConfig {
|
||||||
pub filename: PathBuf,
|
pub filename: PathBuf,
|
||||||
pub inner: std::io::Error,
|
pub inner: std::io::Error,
|
||||||
@ -339,18 +337,20 @@ pub enum NewEmbedderErrorKind {
|
|||||||
UnsupportedModel(UnsupportedModel),
|
UnsupportedModel(UnsupportedModel),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
OpenTokenizer(OpenTokenizer),
|
OpenTokenizer(OpenTokenizer),
|
||||||
#[error("could not build weights from Pytorch weights: {0}")]
|
#[error("could not build weights from Pytorch weights:\n - {0}")]
|
||||||
PytorchWeight(candle_core::Error),
|
PytorchWeight(candle_core::Error),
|
||||||
#[error("could not build weights from Safetensor weights: {0}")]
|
#[error("could not build weights from Safetensor weights:\n - {0}")]
|
||||||
SafetensorWeight(candle_core::Error),
|
SafetensorWeight(candle_core::Error),
|
||||||
#[error("could not spawn HG_HUB API client: {0}")]
|
#[error("could not spawn HG_HUB API client:\n - {0}")]
|
||||||
NewApiFail(ApiError),
|
NewApiFail(ApiError),
|
||||||
#[error("fetching file from HG_HUB failed: {0}")]
|
#[error("fetching file from HG_HUB failed:\n - {0}")]
|
||||||
ApiGet(ApiError),
|
ApiGet(ApiError),
|
||||||
#[error("could not determine model dimensions: test embedding failed with {0}")]
|
#[error("could not determine model dimensions:\n - test embedding failed with {0}")]
|
||||||
CouldNotDetermineDimension(EmbedError),
|
CouldNotDetermineDimension(EmbedError),
|
||||||
#[error("loading model failed: {0}")]
|
#[error("loading model failed:\n - {0}")]
|
||||||
LoadModel(candle_core::Error),
|
LoadModel(candle_core::Error),
|
||||||
|
#[error("{0}")]
|
||||||
|
CouldNotParseTemplate(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct PossibleEmbeddingMistakes {
|
pub struct PossibleEmbeddingMistakes {
|
||||||
|
970
milli/src/vector/json_template.rs
Normal file
970
milli/src/vector/json_template.rs
Normal file
@ -0,0 +1,970 @@
|
|||||||
|
//! Module to manipulate JSON templates.
|
||||||
|
//!
|
||||||
|
//! This module allows two main operations:
|
||||||
|
//! 1. Render JSON values from a template and a context value.
|
||||||
|
//! 2. Retrieve data from a template and JSON values.
|
||||||
|
|
||||||
|
#![warn(rustdoc::broken_intra_doc_links)]
|
||||||
|
#![warn(missing_docs)]
|
||||||
|
|
||||||
|
use serde::Deserialize;
|
||||||
|
use serde_json::{Map, Value};
|
||||||
|
|
||||||
|
type ValuePath = Vec<PathComponent>;
|
||||||
|
|
||||||
|
/// Encapsulates a JSON template and allows injecting and extracting values from it.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ValueTemplate {
|
||||||
|
template: Value,
|
||||||
|
value_kind: ValueKind,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum ValueKind {
|
||||||
|
Single(ValuePath),
|
||||||
|
Array(ArrayPath),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct ArrayPath {
|
||||||
|
repeated_value: Value,
|
||||||
|
path_to_array: ValuePath,
|
||||||
|
value_path_in_array: ValuePath,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Component of a path to a Value
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum PathComponent {
|
||||||
|
/// A key inside of an object
|
||||||
|
MapKey(String),
|
||||||
|
/// An index inside of an array
|
||||||
|
ArrayIndex(usize),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq for PathComponent {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
match (self, other) {
|
||||||
|
(Self::MapKey(l0), Self::MapKey(r0)) => l0 == r0,
|
||||||
|
(Self::ArrayIndex(l0), Self::ArrayIndex(r0)) => l0 == r0,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Eq for PathComponent {}
|
||||||
|
|
||||||
|
/// Error that occurs when no few value was provided to a template for injection.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct MissingValue;
|
||||||
|
|
||||||
|
/// Error that occurs when trying to parse a template in [`ValueTemplate::new`]
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum TemplateParsingError {
|
||||||
|
/// A repeat string appears inside a repeated value
|
||||||
|
NestedRepeatString(ValuePath),
|
||||||
|
/// A repeat string appears outside of an array
|
||||||
|
RepeatStringNotInArray(ValuePath),
|
||||||
|
/// A repeat string appears in an array, but not in the second position
|
||||||
|
BadIndexForRepeatString(ValuePath, usize),
|
||||||
|
/// A repeated value lacks a placeholder
|
||||||
|
MissingPlaceholderInRepeatedValue(ValuePath),
|
||||||
|
/// Multiple repeat string appear in the template
|
||||||
|
MultipleRepeatString(ValuePath, ValuePath),
|
||||||
|
/// Multiple placeholder strings appear in the template
|
||||||
|
MultiplePlaceholderString(ValuePath, ValuePath),
|
||||||
|
/// No placeholder string appear in the template
|
||||||
|
MissingPlaceholderString,
|
||||||
|
/// A placeholder appears both inside a repeated value and outside of it
|
||||||
|
BothArrayAndSingle {
|
||||||
|
/// Path to the single value
|
||||||
|
single_path: ValuePath,
|
||||||
|
/// Path to the array of repeated values
|
||||||
|
path_to_array: ValuePath,
|
||||||
|
/// Path to placeholder inside each repeated value, starting from the array
|
||||||
|
array_to_placeholder: ValuePath,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TemplateParsingError {
|
||||||
|
/// Produce an error message from the error kind, the name of the root object, the placeholder string and the repeat string
|
||||||
|
pub fn error_message(&self, root: &str, placeholder: &str, repeat: &str) -> String {
|
||||||
|
match self {
|
||||||
|
TemplateParsingError::NestedRepeatString(path) => {
|
||||||
|
format!(
|
||||||
|
r#"in {}: "{repeat}" appears nested inside of a value that is itself repeated"#,
|
||||||
|
path_with_root(root, path)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
TemplateParsingError::RepeatStringNotInArray(path) => format!(
|
||||||
|
r#"in {}: "{repeat}" appears outside of an array"#,
|
||||||
|
path_with_root(root, path)
|
||||||
|
),
|
||||||
|
TemplateParsingError::BadIndexForRepeatString(path, index) => format!(
|
||||||
|
r#"in {}: "{repeat}" expected at position #1, but found at position #{index}"#,
|
||||||
|
path_with_root(root, path)
|
||||||
|
),
|
||||||
|
TemplateParsingError::MissingPlaceholderInRepeatedValue(path) => format!(
|
||||||
|
r#"in {}: Expected "{placeholder}" inside of the repeated value"#,
|
||||||
|
path_with_root(root, path)
|
||||||
|
),
|
||||||
|
TemplateParsingError::MultipleRepeatString(current, previous) => format!(
|
||||||
|
r#"in {}: Found "{repeat}", but it was already present in {}"#,
|
||||||
|
path_with_root(root, current),
|
||||||
|
path_with_root(root, previous)
|
||||||
|
),
|
||||||
|
TemplateParsingError::MultiplePlaceholderString(current, previous) => format!(
|
||||||
|
r#"in {}: Found "{placeholder}", but it was already present in {}"#,
|
||||||
|
path_with_root(root, current),
|
||||||
|
path_with_root(root, previous)
|
||||||
|
),
|
||||||
|
TemplateParsingError::MissingPlaceholderString => {
|
||||||
|
format!(r#"in `{root}`: "{placeholder}" not found"#)
|
||||||
|
}
|
||||||
|
TemplateParsingError::BothArrayAndSingle {
|
||||||
|
single_path,
|
||||||
|
path_to_array,
|
||||||
|
array_to_placeholder,
|
||||||
|
} => {
|
||||||
|
let path_to_first_repeated = path_to_array
|
||||||
|
.iter()
|
||||||
|
.chain(std::iter::once(&PathComponent::ArrayIndex(0)))
|
||||||
|
.chain(array_to_placeholder.iter());
|
||||||
|
format!(
|
||||||
|
r#"in {}: Found "{placeholder}", but it was already present in {} (repeated)"#,
|
||||||
|
path_with_root(root, single_path),
|
||||||
|
path_with_root(root, path_to_first_repeated)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prepend_path(self, mut prepended_path: ValuePath) -> Self {
|
||||||
|
match self {
|
||||||
|
TemplateParsingError::NestedRepeatString(mut path) => {
|
||||||
|
prepended_path.append(&mut path);
|
||||||
|
TemplateParsingError::NestedRepeatString(prepended_path)
|
||||||
|
}
|
||||||
|
TemplateParsingError::RepeatStringNotInArray(mut path) => {
|
||||||
|
prepended_path.append(&mut path);
|
||||||
|
TemplateParsingError::RepeatStringNotInArray(prepended_path)
|
||||||
|
}
|
||||||
|
TemplateParsingError::BadIndexForRepeatString(mut path, index) => {
|
||||||
|
prepended_path.append(&mut path);
|
||||||
|
TemplateParsingError::BadIndexForRepeatString(prepended_path, index)
|
||||||
|
}
|
||||||
|
TemplateParsingError::MissingPlaceholderInRepeatedValue(mut path) => {
|
||||||
|
prepended_path.append(&mut path);
|
||||||
|
TemplateParsingError::MissingPlaceholderInRepeatedValue(prepended_path)
|
||||||
|
}
|
||||||
|
TemplateParsingError::MultipleRepeatString(mut path, older_path) => {
|
||||||
|
let older_prepended_path =
|
||||||
|
prepended_path.iter().cloned().chain(older_path).collect();
|
||||||
|
prepended_path.append(&mut path);
|
||||||
|
TemplateParsingError::MultipleRepeatString(prepended_path, older_prepended_path)
|
||||||
|
}
|
||||||
|
TemplateParsingError::MultiplePlaceholderString(mut path, older_path) => {
|
||||||
|
let older_prepended_path =
|
||||||
|
prepended_path.iter().cloned().chain(older_path).collect();
|
||||||
|
prepended_path.append(&mut path);
|
||||||
|
TemplateParsingError::MultiplePlaceholderString(
|
||||||
|
prepended_path,
|
||||||
|
older_prepended_path,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
TemplateParsingError::MissingPlaceholderString => {
|
||||||
|
TemplateParsingError::MissingPlaceholderString
|
||||||
|
}
|
||||||
|
TemplateParsingError::BothArrayAndSingle {
|
||||||
|
single_path,
|
||||||
|
mut path_to_array,
|
||||||
|
array_to_placeholder,
|
||||||
|
} => {
|
||||||
|
// note, this case is not super logical, but is also likely to be dead code
|
||||||
|
let single_prepended_path =
|
||||||
|
prepended_path.iter().cloned().chain(single_path).collect();
|
||||||
|
prepended_path.append(&mut path_to_array);
|
||||||
|
// we don't prepend the array_to_placeholder path as it is the array path that is prepended
|
||||||
|
TemplateParsingError::BothArrayAndSingle {
|
||||||
|
single_path: single_prepended_path,
|
||||||
|
path_to_array: prepended_path,
|
||||||
|
array_to_placeholder,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Error that occurs when [`ValueTemplate::extract`] fails.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ExtractionError {
|
||||||
|
/// The cause of the failure
|
||||||
|
pub kind: ExtractionErrorKind,
|
||||||
|
/// The context where the failure happened: the operation that failed
|
||||||
|
pub context: ExtractionErrorContext,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ExtractionError {
|
||||||
|
/// Produce an error message from the error, the name of the root object, the placeholder string and the expected value type
|
||||||
|
pub fn error_message(
|
||||||
|
&self,
|
||||||
|
root: &str,
|
||||||
|
placeholder: &str,
|
||||||
|
expected_value_type: &str,
|
||||||
|
) -> String {
|
||||||
|
let context = match &self.context {
|
||||||
|
ExtractionErrorContext::ExtractingSingleValue => {
|
||||||
|
format!(r#"extracting a single "{placeholder}""#)
|
||||||
|
}
|
||||||
|
ExtractionErrorContext::FindingPathToArray => {
|
||||||
|
format!(r#"extracting the array of "{placeholder}"s"#)
|
||||||
|
}
|
||||||
|
ExtractionErrorContext::ExtractingArrayItem(index) => {
|
||||||
|
format!(r#"extracting item #{index} from the array of "{placeholder}"s"#)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
match &self.kind {
|
||||||
|
ExtractionErrorKind::MissingPathComponent { missing_index, path, key_suggestion } => {
|
||||||
|
let last_named_object = last_named_object(root, path.iter().take(*missing_index));
|
||||||
|
format!(
|
||||||
|
"in {}, while {context}, configuration expects {}, which is missing in response{}",
|
||||||
|
path_with_root(root, path.iter().take(*missing_index)),
|
||||||
|
missing_component(path.get(*missing_index)),
|
||||||
|
match key_suggestion {
|
||||||
|
Some(key_suggestion) => format!("\n - Hint: {last_named_object} has key `{key_suggestion}`, did you mean {} in embedder configuration?",
|
||||||
|
path_with_root(root, path.iter().take(*missing_index).chain(std::iter::once(&PathComponent::MapKey(key_suggestion.to_owned()))))),
|
||||||
|
None => "".to_owned(),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
ExtractionErrorKind::WrongPathComponent { wrong_component, index, path } => {
|
||||||
|
let last_named_object = last_named_object(root, path.iter().take(*index));
|
||||||
|
format!(
|
||||||
|
"in {}, while {context}, configuration expects {last_named_object} to be {} but server sent {wrong_component}",
|
||||||
|
path_with_root(root, path.iter().take(*index)),
|
||||||
|
expected_component(path.get(*index))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
ExtractionErrorKind::DeserializationError { error, path } => {
|
||||||
|
let last_named_object = last_named_object(root, path);
|
||||||
|
format!(
|
||||||
|
"in {}, while {context}, expected {last_named_object} to be {expected_value_type}, but failed to parse server response:\n - {error}",
|
||||||
|
path_with_root(root, path)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn missing_component(component: Option<&PathComponent>) -> String {
|
||||||
|
match component {
|
||||||
|
Some(PathComponent::ArrayIndex(index)) => {
|
||||||
|
format!(r#"item #{index}"#)
|
||||||
|
}
|
||||||
|
Some(PathComponent::MapKey(key)) => {
|
||||||
|
format!(r#"key "{key}""#)
|
||||||
|
}
|
||||||
|
None => "unknown".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expected_component(component: Option<&PathComponent>) -> String {
|
||||||
|
match component {
|
||||||
|
Some(PathComponent::ArrayIndex(index)) => {
|
||||||
|
format!(r#"an array with at least {} item(s)"#, index.saturating_add(1))
|
||||||
|
}
|
||||||
|
Some(PathComponent::MapKey(key)) => {
|
||||||
|
format!("an object with key `{}`", key)
|
||||||
|
}
|
||||||
|
None => "unknown".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn last_named_object<'a>(
|
||||||
|
root: &'a str,
|
||||||
|
path: impl IntoIterator<Item = &'a PathComponent> + 'a,
|
||||||
|
) -> LastNamedObject<'a> {
|
||||||
|
let mut last_named_object = LastNamedObject::Object { name: root };
|
||||||
|
for component in path.into_iter() {
|
||||||
|
last_named_object = match (component, last_named_object) {
|
||||||
|
(PathComponent::MapKey(name), _) => LastNamedObject::Object { name },
|
||||||
|
(PathComponent::ArrayIndex(index), LastNamedObject::Object { name }) => {
|
||||||
|
LastNamedObject::ArrayInsideObject { object_name: name, index: *index }
|
||||||
|
}
|
||||||
|
(
|
||||||
|
PathComponent::ArrayIndex(index),
|
||||||
|
LastNamedObject::ArrayInsideObject { object_name, index: _ },
|
||||||
|
) => LastNamedObject::NestedArrayInsideObject {
|
||||||
|
object_name,
|
||||||
|
index: *index,
|
||||||
|
nesting_level: 0,
|
||||||
|
},
|
||||||
|
(
|
||||||
|
PathComponent::ArrayIndex(index),
|
||||||
|
LastNamedObject::NestedArrayInsideObject { object_name, index: _, nesting_level },
|
||||||
|
) => LastNamedObject::NestedArrayInsideObject {
|
||||||
|
object_name,
|
||||||
|
index: *index,
|
||||||
|
nesting_level: nesting_level.saturating_add(1),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
last_named_object
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> std::fmt::Display for LastNamedObject<'a> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
LastNamedObject::Object { name } => write!(f, "`{name}`"),
|
||||||
|
LastNamedObject::ArrayInsideObject { object_name, index } => {
|
||||||
|
write!(f, "item #{index} inside `{object_name}`")
|
||||||
|
}
|
||||||
|
LastNamedObject::NestedArrayInsideObject { object_name, index, nesting_level } => {
|
||||||
|
if *nesting_level == 0 {
|
||||||
|
write!(f, "item #{index} inside nested array in `{object_name}`")
|
||||||
|
} else {
|
||||||
|
write!(f, "item #{index} inside nested array ({} levels of nesting) in `{object_name}`", nesting_level + 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
enum LastNamedObject<'a> {
|
||||||
|
Object { name: &'a str },
|
||||||
|
ArrayInsideObject { object_name: &'a str, index: usize },
|
||||||
|
NestedArrayInsideObject { object_name: &'a str, index: usize, nesting_level: usize },
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Builds a string representation of a path, preprending the name of the root value.
|
||||||
|
pub fn path_with_root<'a>(
|
||||||
|
root: &str,
|
||||||
|
path: impl IntoIterator<Item = &'a PathComponent> + 'a,
|
||||||
|
) -> String {
|
||||||
|
use std::fmt::Write as _;
|
||||||
|
let mut res = format!("`{root}");
|
||||||
|
for component in path.into_iter() {
|
||||||
|
match component {
|
||||||
|
PathComponent::MapKey(key) => {
|
||||||
|
let _ = write!(&mut res, ".{key}");
|
||||||
|
}
|
||||||
|
PathComponent::ArrayIndex(index) => {
|
||||||
|
let _ = write!(&mut res, "[{index}]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res.push('`');
|
||||||
|
res
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Context where an extraction failure happened
|
||||||
|
///
|
||||||
|
/// The operation that failed
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub enum ExtractionErrorContext {
|
||||||
|
/// Failure happened while extracting a value at a single location
|
||||||
|
ExtractingSingleValue,
|
||||||
|
/// Failure happened while extracting an array of values
|
||||||
|
FindingPathToArray,
|
||||||
|
/// Failure happened while extracting a value inside of an array
|
||||||
|
ExtractingArrayItem(usize),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Kind of errors that can happen during extraction
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum ExtractionErrorKind {
|
||||||
|
/// An expected path component is missing
|
||||||
|
MissingPathComponent {
|
||||||
|
/// Index of the missing component in the path
|
||||||
|
missing_index: usize,
|
||||||
|
/// Path where a component is missing
|
||||||
|
path: ValuePath,
|
||||||
|
/// Possible matching key in object
|
||||||
|
key_suggestion: Option<String>,
|
||||||
|
},
|
||||||
|
/// An expected path component cannot be found because its container is the wrong type
|
||||||
|
WrongPathComponent {
|
||||||
|
/// String representation of the wrong component
|
||||||
|
wrong_component: String,
|
||||||
|
/// Index of the wrong component in the path
|
||||||
|
index: usize,
|
||||||
|
/// Path where a component has the wrong type
|
||||||
|
path: ValuePath,
|
||||||
|
},
|
||||||
|
/// Could not deserialize an extracted value to its requested type
|
||||||
|
DeserializationError {
|
||||||
|
/// inner deserialization error
|
||||||
|
error: serde_json::Error,
|
||||||
|
/// path to extracted value
|
||||||
|
path: ValuePath,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
enum ArrayParsingContext<'a> {
|
||||||
|
Nested,
|
||||||
|
NotNested(&'a mut Option<ArrayPath>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ValueTemplate {
|
||||||
|
/// Prepare a template for injection or extraction.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
///
|
||||||
|
/// - `template`: JSON value that acts a template. Its placeholder values will be replaced by actual values during injection,
|
||||||
|
/// and actual values will be recovered from their location during extraction.
|
||||||
|
/// - `placeholder_string`: Value that a JSON string should assume to act as a placeholder value that can be injected into or
|
||||||
|
/// extracted from.
|
||||||
|
/// - `repeat_string`: Sentinel value that can be placed as the second value in an array to indicate that the first value can be repeated
|
||||||
|
/// any number of times. The first value should contain exactly one placeholder string.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// - [`TemplateParsingError`]: refer to the documentation of this type
|
||||||
|
pub fn new(
|
||||||
|
template: Value,
|
||||||
|
placeholder_string: &str,
|
||||||
|
repeat_string: &str,
|
||||||
|
) -> Result<Self, TemplateParsingError> {
|
||||||
|
let mut value_path = None;
|
||||||
|
let mut array_path = None;
|
||||||
|
let mut current_path = Vec::new();
|
||||||
|
Self::parse_value(
|
||||||
|
&template,
|
||||||
|
placeholder_string,
|
||||||
|
repeat_string,
|
||||||
|
&mut value_path,
|
||||||
|
&mut ArrayParsingContext::NotNested(&mut array_path),
|
||||||
|
&mut current_path,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let value_kind = match (array_path, value_path) {
|
||||||
|
(None, None) => return Err(TemplateParsingError::MissingPlaceholderString),
|
||||||
|
(None, Some(value_path)) => ValueKind::Single(value_path),
|
||||||
|
(Some(array_path), None) => ValueKind::Array(array_path),
|
||||||
|
(Some(array_path), Some(value_path)) => {
|
||||||
|
return Err(TemplateParsingError::BothArrayAndSingle {
|
||||||
|
single_path: value_path,
|
||||||
|
path_to_array: array_path.path_to_array,
|
||||||
|
array_to_placeholder: array_path.value_path_in_array,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Self { template, value_kind })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether there is a placeholder that can be repeated.
|
||||||
|
///
|
||||||
|
/// - During injection, all values are injected in the array placeholder,
|
||||||
|
/// - During extraction, all repeatable placeholders are extracted from the array.
|
||||||
|
pub fn has_array_value(&self) -> bool {
|
||||||
|
matches!(self.value_kind, ValueKind::Array(_))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Render a value from the template and context values.
|
||||||
|
///
|
||||||
|
/// # Error
|
||||||
|
///
|
||||||
|
/// - [`MissingValue`]: if the number of injected values is 0.
|
||||||
|
pub fn inject(&self, values: impl IntoIterator<Item = Value>) -> Result<Value, MissingValue> {
|
||||||
|
let mut rendered = self.template.clone();
|
||||||
|
let mut values = values.into_iter();
|
||||||
|
|
||||||
|
match &self.value_kind {
|
||||||
|
ValueKind::Single(injection_path) => {
|
||||||
|
let Some(injected_value) = values.next() else { return Err(MissingValue) };
|
||||||
|
inject_value(&mut rendered, injection_path, injected_value);
|
||||||
|
}
|
||||||
|
ValueKind::Array(ArrayPath { repeated_value, path_to_array, value_path_in_array }) => {
|
||||||
|
// 1. build the array of repeated values
|
||||||
|
let mut array = Vec::new();
|
||||||
|
for injected_value in values {
|
||||||
|
let mut repeated_value = repeated_value.clone();
|
||||||
|
inject_value(&mut repeated_value, value_path_in_array, injected_value);
|
||||||
|
array.push(repeated_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
if array.is_empty() {
|
||||||
|
return Err(MissingValue);
|
||||||
|
}
|
||||||
|
// 2. inject at the injection point in the rendered value
|
||||||
|
inject_value(&mut rendered, path_to_array, Value::Array(array));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(rendered)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract sub values from the template and a value.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// - if a single placeholder is missing.
|
||||||
|
/// - if there is no value corresponding to an array placeholder
|
||||||
|
/// - if the value corresponding to an array placeholder is not an array
|
||||||
|
pub fn extract<T>(&self, mut value: Value) -> Result<Vec<T>, ExtractionError>
|
||||||
|
where
|
||||||
|
T: for<'de> Deserialize<'de>,
|
||||||
|
{
|
||||||
|
Ok(match &self.value_kind {
|
||||||
|
ValueKind::Single(extraction_path) => {
|
||||||
|
let extracted_value =
|
||||||
|
extract_value(extraction_path, &mut value).with_context(|kind| {
|
||||||
|
ExtractionError {
|
||||||
|
kind,
|
||||||
|
context: ExtractionErrorContext::ExtractingSingleValue,
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
vec![extracted_value]
|
||||||
|
}
|
||||||
|
ValueKind::Array(ArrayPath {
|
||||||
|
repeated_value: _,
|
||||||
|
path_to_array,
|
||||||
|
value_path_in_array,
|
||||||
|
}) => {
|
||||||
|
// get the array
|
||||||
|
let array = extract_value(path_to_array, &mut value).with_context(|kind| {
|
||||||
|
ExtractionError { kind, context: ExtractionErrorContext::FindingPathToArray }
|
||||||
|
})?;
|
||||||
|
let array = match array {
|
||||||
|
Value::Array(array) => array,
|
||||||
|
not_array => {
|
||||||
|
let mut path = path_to_array.clone();
|
||||||
|
path.push(PathComponent::ArrayIndex(0));
|
||||||
|
return Err(ExtractionError {
|
||||||
|
kind: ExtractionErrorKind::WrongPathComponent {
|
||||||
|
wrong_component: format_value(¬_array),
|
||||||
|
index: path_to_array.len(),
|
||||||
|
path,
|
||||||
|
},
|
||||||
|
context: ExtractionErrorContext::FindingPathToArray,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let mut extracted_values = Vec::with_capacity(array.len());
|
||||||
|
|
||||||
|
for (index, mut item) in array.into_iter().enumerate() {
|
||||||
|
let extracted_value = extract_value(value_path_in_array, &mut item)
|
||||||
|
.with_context(|kind| ExtractionError {
|
||||||
|
kind,
|
||||||
|
context: ExtractionErrorContext::ExtractingArrayItem(index),
|
||||||
|
})?;
|
||||||
|
extracted_values.push(extracted_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
extracted_values
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_array(
|
||||||
|
array: &[Value],
|
||||||
|
placeholder_string: &str,
|
||||||
|
repeat_string: &str,
|
||||||
|
value_path: &mut Option<ValuePath>,
|
||||||
|
mut array_path: &mut ArrayParsingContext,
|
||||||
|
current_path: &mut ValuePath,
|
||||||
|
) -> Result<(), TemplateParsingError> {
|
||||||
|
// two modes for parsing array.
|
||||||
|
match array {
|
||||||
|
// 1. array contains a repeat string in second position
|
||||||
|
[first, second, rest @ ..] if second == repeat_string => {
|
||||||
|
let ArrayParsingContext::NotNested(array_path) = &mut array_path else {
|
||||||
|
return Err(TemplateParsingError::NestedRepeatString(current_path.clone()));
|
||||||
|
};
|
||||||
|
if let Some(array_path) = array_path {
|
||||||
|
return Err(TemplateParsingError::MultipleRepeatString(
|
||||||
|
current_path.clone(),
|
||||||
|
array_path.path_to_array.clone(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
if first == repeat_string {
|
||||||
|
return Err(TemplateParsingError::BadIndexForRepeatString(
|
||||||
|
current_path.clone(),
|
||||||
|
0,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
if let Some(position) = rest.iter().position(|value| value == repeat_string) {
|
||||||
|
let position = position + 2;
|
||||||
|
return Err(TemplateParsingError::BadIndexForRepeatString(
|
||||||
|
current_path.clone(),
|
||||||
|
position,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let value_path_in_array = {
|
||||||
|
let mut value_path = None;
|
||||||
|
let mut current_path_in_array = Vec::new();
|
||||||
|
|
||||||
|
Self::parse_value(
|
||||||
|
first,
|
||||||
|
placeholder_string,
|
||||||
|
repeat_string,
|
||||||
|
&mut value_path,
|
||||||
|
&mut ArrayParsingContext::Nested,
|
||||||
|
&mut current_path_in_array,
|
||||||
|
)
|
||||||
|
.map_err(|error| error.prepend_path(current_path.to_vec()))?;
|
||||||
|
|
||||||
|
value_path.ok_or_else(|| {
|
||||||
|
let mut repeated_value_path = current_path.clone();
|
||||||
|
repeated_value_path.push(PathComponent::ArrayIndex(0));
|
||||||
|
TemplateParsingError::MissingPlaceholderInRepeatedValue(repeated_value_path)
|
||||||
|
})?
|
||||||
|
};
|
||||||
|
**array_path = Some(ArrayPath {
|
||||||
|
repeated_value: first.to_owned(),
|
||||||
|
path_to_array: current_path.clone(),
|
||||||
|
value_path_in_array,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// 2. array does not contain a repeat string
|
||||||
|
array => {
|
||||||
|
if let Some(position) = array.iter().position(|value| value == repeat_string) {
|
||||||
|
return Err(TemplateParsingError::BadIndexForRepeatString(
|
||||||
|
current_path.clone(),
|
||||||
|
position,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
for (index, value) in array.iter().enumerate() {
|
||||||
|
current_path.push(PathComponent::ArrayIndex(index));
|
||||||
|
Self::parse_value(
|
||||||
|
value,
|
||||||
|
placeholder_string,
|
||||||
|
repeat_string,
|
||||||
|
value_path,
|
||||||
|
array_path,
|
||||||
|
current_path,
|
||||||
|
)?;
|
||||||
|
current_path.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_object(
|
||||||
|
object: &Map<String, Value>,
|
||||||
|
placeholder_string: &str,
|
||||||
|
repeat_string: &str,
|
||||||
|
value_path: &mut Option<ValuePath>,
|
||||||
|
array_path: &mut ArrayParsingContext,
|
||||||
|
current_path: &mut ValuePath,
|
||||||
|
) -> Result<(), TemplateParsingError> {
|
||||||
|
for (key, value) in object.iter() {
|
||||||
|
current_path.push(PathComponent::MapKey(key.to_owned()));
|
||||||
|
Self::parse_value(
|
||||||
|
value,
|
||||||
|
placeholder_string,
|
||||||
|
repeat_string,
|
||||||
|
value_path,
|
||||||
|
array_path,
|
||||||
|
current_path,
|
||||||
|
)?;
|
||||||
|
current_path.pop();
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_value(
|
||||||
|
value: &Value,
|
||||||
|
placeholder_string: &str,
|
||||||
|
repeat_string: &str,
|
||||||
|
value_path: &mut Option<ValuePath>,
|
||||||
|
array_path: &mut ArrayParsingContext,
|
||||||
|
current_path: &mut ValuePath,
|
||||||
|
) -> Result<(), TemplateParsingError> {
|
||||||
|
match value {
|
||||||
|
Value::String(str) => {
|
||||||
|
if placeholder_string == str {
|
||||||
|
if let Some(value_path) = value_path {
|
||||||
|
return Err(TemplateParsingError::MultiplePlaceholderString(
|
||||||
|
current_path.clone(),
|
||||||
|
value_path.clone(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
*value_path = Some(current_path.clone());
|
||||||
|
}
|
||||||
|
if repeat_string == str {
|
||||||
|
return Err(TemplateParsingError::RepeatStringNotInArray(current_path.clone()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Value::Null | Value::Bool(_) | Value::Number(_) => {}
|
||||||
|
Value::Array(array) => Self::parse_array(
|
||||||
|
array,
|
||||||
|
placeholder_string,
|
||||||
|
repeat_string,
|
||||||
|
value_path,
|
||||||
|
array_path,
|
||||||
|
current_path,
|
||||||
|
)?,
|
||||||
|
Value::Object(object) => Self::parse_object(
|
||||||
|
object,
|
||||||
|
placeholder_string,
|
||||||
|
repeat_string,
|
||||||
|
value_path,
|
||||||
|
array_path,
|
||||||
|
current_path,
|
||||||
|
)?,
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inject_value(rendered: &mut Value, injection_path: &Vec<PathComponent>, injected_value: Value) {
|
||||||
|
let mut current_value = rendered;
|
||||||
|
for injection_component in injection_path {
|
||||||
|
current_value = match injection_component {
|
||||||
|
PathComponent::MapKey(key) => current_value.get_mut(key).unwrap(),
|
||||||
|
PathComponent::ArrayIndex(index) => current_value.get_mut(index).unwrap(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*current_value = injected_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_value(value: &Value) -> String {
|
||||||
|
match value {
|
||||||
|
Value::Array(array) => format!("an array of size {}", array.len()),
|
||||||
|
Value::Object(object) => {
|
||||||
|
format!("an object with {} field(s)", object.len())
|
||||||
|
}
|
||||||
|
value => value.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_value<T>(
|
||||||
|
extraction_path: &[PathComponent],
|
||||||
|
initial_value: &mut Value,
|
||||||
|
) -> Result<T, ExtractionErrorKind>
|
||||||
|
where
|
||||||
|
T: for<'de> Deserialize<'de>,
|
||||||
|
{
|
||||||
|
let mut current_value = initial_value;
|
||||||
|
for (path_index, extraction_component) in extraction_path.iter().enumerate() {
|
||||||
|
current_value = {
|
||||||
|
match extraction_component {
|
||||||
|
PathComponent::MapKey(key) => {
|
||||||
|
if !current_value.is_object() {
|
||||||
|
return Err(ExtractionErrorKind::WrongPathComponent {
|
||||||
|
wrong_component: format_value(current_value),
|
||||||
|
index: path_index,
|
||||||
|
path: extraction_path.to_vec(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if let Some(object) = current_value.as_object_mut() {
|
||||||
|
if !object.contains_key(key) {
|
||||||
|
let typos =
|
||||||
|
levenshtein_automata::LevenshteinAutomatonBuilder::new(2, true)
|
||||||
|
.build_dfa(key);
|
||||||
|
let mut key_suggestion = None;
|
||||||
|
'check_typos: for (key, _) in object.iter() {
|
||||||
|
match typos.eval(key) {
|
||||||
|
levenshtein_automata::Distance::Exact(0) => { /* ??? */ }
|
||||||
|
levenshtein_automata::Distance::Exact(_) => {
|
||||||
|
key_suggestion = Some(key.to_owned());
|
||||||
|
break 'check_typos;
|
||||||
|
}
|
||||||
|
levenshtein_automata::Distance::AtLeast(_) => continue,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Err(ExtractionErrorKind::MissingPathComponent {
|
||||||
|
missing_index: path_index,
|
||||||
|
path: extraction_path.to_vec(),
|
||||||
|
key_suggestion,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if let Some(value) = object.get_mut(key) {
|
||||||
|
value
|
||||||
|
} else {
|
||||||
|
// borrow checking limit: the borrow checker cannot be convinced that `object` is no longer mutably borrowed on the
|
||||||
|
// `else` branch of the `if let`, so we cannot return MissingPathComponent here.
|
||||||
|
// As a workaround, we checked that the object does not contain the key above, making this `else` unreachable.
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// borrow checking limit: the borrow checker cannot be convinced that `current_value` is no longer mutably borrowed
|
||||||
|
// on the `else` branch of the `if let`, so we cannot return WrongPathComponent here.
|
||||||
|
// As a workaround, we checked that the value was not a map above, making this `else` unreachable.
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PathComponent::ArrayIndex(index) => {
|
||||||
|
if !current_value.is_array() {
|
||||||
|
return Err(ExtractionErrorKind::WrongPathComponent {
|
||||||
|
wrong_component: format_value(current_value),
|
||||||
|
index: path_index,
|
||||||
|
path: extraction_path.to_vec(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
match current_value.get_mut(index) {
|
||||||
|
Some(value) => value,
|
||||||
|
None => {
|
||||||
|
return Err(ExtractionErrorKind::MissingPathComponent {
|
||||||
|
missing_index: path_index,
|
||||||
|
path: extraction_path.to_vec(),
|
||||||
|
key_suggestion: None,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
serde_json::from_value(current_value.take()).map_err(|error| {
|
||||||
|
ExtractionErrorKind::DeserializationError { error, path: extraction_path.to_vec() }
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
trait ExtractionResultErrorContext<T> {
|
||||||
|
fn with_context<F>(self, f: F) -> Result<T, ExtractionError>
|
||||||
|
where
|
||||||
|
F: FnOnce(ExtractionErrorKind) -> ExtractionError;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> ExtractionResultErrorContext<T> for Result<T, ExtractionErrorKind> {
|
||||||
|
fn with_context<F>(self, f: F) -> Result<T, ExtractionError>
|
||||||
|
where
|
||||||
|
F: FnOnce(ExtractionErrorKind) -> ExtractionError,
|
||||||
|
{
|
||||||
|
match self {
|
||||||
|
Ok(t) => Ok(t),
|
||||||
|
Err(kind) => Err(f(kind)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use serde_json::{json, Value};
|
||||||
|
|
||||||
|
use super::{PathComponent, TemplateParsingError, ValueTemplate};
|
||||||
|
|
||||||
|
fn new_template(template: Value) -> Result<ValueTemplate, TemplateParsingError> {
|
||||||
|
ValueTemplate::new(template, "{{text}}", "{{..}}")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty_template() {
|
||||||
|
let template = json!({
|
||||||
|
"toto": "no template at all",
|
||||||
|
"titi": ["this", "will", "not", "work"],
|
||||||
|
"tutu": null
|
||||||
|
});
|
||||||
|
|
||||||
|
let error = new_template(template.clone()).unwrap_err();
|
||||||
|
assert!(matches!(error, TemplateParsingError::MissingPlaceholderString))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn single_template() {
|
||||||
|
let template = json!({
|
||||||
|
"toto": "text",
|
||||||
|
"titi": ["this", "will", "still", "{{text}}"],
|
||||||
|
"tutu": null
|
||||||
|
});
|
||||||
|
|
||||||
|
let basic = new_template(template.clone()).unwrap();
|
||||||
|
|
||||||
|
assert!(!basic.has_array_value());
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
basic.inject(vec!["work".into(), Value::Null, "test".into()]).unwrap(),
|
||||||
|
json!({
|
||||||
|
"toto": "text",
|
||||||
|
"titi": ["this", "will", "still", "work"],
|
||||||
|
"tutu": null
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn too_many_placeholders() {
|
||||||
|
let template = json!({
|
||||||
|
"toto": "{{text}}",
|
||||||
|
"titi": ["this", "will", "still", "{{text}}"],
|
||||||
|
"tutu": "text"
|
||||||
|
});
|
||||||
|
|
||||||
|
match new_template(template.clone()) {
|
||||||
|
Err(TemplateParsingError::MultiplePlaceholderString(left, right)) => {
|
||||||
|
assert_eq!(
|
||||||
|
left,
|
||||||
|
vec![PathComponent::MapKey("titi".into()), PathComponent::ArrayIndex(3)]
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(right, vec![PathComponent::MapKey("toto".into())])
|
||||||
|
}
|
||||||
|
_ => panic!("should error"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn dynamic_template() {
|
||||||
|
let template = json!({
|
||||||
|
"toto": "text",
|
||||||
|
"titi": [{
|
||||||
|
"type": "text",
|
||||||
|
"data": "{{text}}"
|
||||||
|
}, "{{..}}"],
|
||||||
|
"tutu": null
|
||||||
|
});
|
||||||
|
|
||||||
|
let basic = new_template(template.clone()).unwrap();
|
||||||
|
|
||||||
|
assert!(basic.has_array_value());
|
||||||
|
|
||||||
|
let injected_values = vec![
|
||||||
|
"work".into(),
|
||||||
|
Value::Null,
|
||||||
|
42.into(),
|
||||||
|
"test".into(),
|
||||||
|
"tata".into(),
|
||||||
|
"titi".into(),
|
||||||
|
"tutu".into(),
|
||||||
|
];
|
||||||
|
|
||||||
|
let rendered = basic.inject(injected_values.clone()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
rendered,
|
||||||
|
json!({
|
||||||
|
"toto": "text",
|
||||||
|
"titi": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"data": "work"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"data": Value::Null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"data": 42
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"data": "test"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"data": "tata"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"data": "titi"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"data": "tutu"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tutu": null
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
let extracted_values: Vec<Value> = basic.extract(rendered).unwrap();
|
||||||
|
assert_eq!(extracted_values, injected_values);
|
||||||
|
}
|
||||||
|
}
|
@ -11,6 +11,7 @@ use crate::ThreadPoolNoAbort;
|
|||||||
|
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod hf;
|
pub mod hf;
|
||||||
|
pub mod json_template;
|
||||||
pub mod manual;
|
pub mod manual;
|
||||||
pub mod openai;
|
pub mod openai;
|
||||||
pub mod parsed_vectors;
|
pub mod parsed_vectors;
|
||||||
@ -227,7 +228,9 @@ impl Embedder {
|
|||||||
EmbedderOptions::UserProvided(options) => {
|
EmbedderOptions::UserProvided(options) => {
|
||||||
Self::UserProvided(manual::Embedder::new(options))
|
Self::UserProvided(manual::Embedder::new(options))
|
||||||
}
|
}
|
||||||
EmbedderOptions::Rest(options) => Self::Rest(rest::Embedder::new(options)?),
|
EmbedderOptions::Rest(options) => {
|
||||||
|
Self::Rest(rest::Embedder::new(options, rest::ConfigurationSource::User)?)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,19 +28,22 @@ impl EmbedderOptions {
|
|||||||
impl Embedder {
|
impl Embedder {
|
||||||
pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> {
|
pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> {
|
||||||
let model = options.embedding_model.as_str();
|
let model = options.embedding_model.as_str();
|
||||||
let rest_embedder = match RestEmbedder::new(RestEmbedderOptions {
|
let rest_embedder = match RestEmbedder::new(
|
||||||
|
RestEmbedderOptions {
|
||||||
api_key: options.api_key,
|
api_key: options.api_key,
|
||||||
dimensions: None,
|
dimensions: None,
|
||||||
distribution: options.distribution,
|
distribution: options.distribution,
|
||||||
url: options.url.unwrap_or_else(get_ollama_path),
|
url: options.url.unwrap_or_else(get_ollama_path),
|
||||||
query: serde_json::json!({
|
request: serde_json::json!({
|
||||||
"model": model,
|
"model": model,
|
||||||
|
"prompt": super::rest::REQUEST_PLACEHOLDER,
|
||||||
}),
|
}),
|
||||||
input_field: vec!["prompt".to_owned()],
|
response: serde_json::json!({
|
||||||
path_to_embeddings: Default::default(),
|
"embedding": super::rest::RESPONSE_PLACEHOLDER,
|
||||||
embedding_object: vec!["embedding".to_owned()],
|
}),
|
||||||
input_type: super::rest::InputType::Text,
|
},
|
||||||
}) {
|
super::rest::ConfigurationSource::Ollama,
|
||||||
|
) {
|
||||||
Ok(embedder) => embedder,
|
Ok(embedder) => embedder,
|
||||||
Err(NewEmbedderError {
|
Err(NewEmbedderError {
|
||||||
kind:
|
kind:
|
||||||
|
@ -26,20 +26,21 @@ impl EmbedderOptions {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn query(&self) -> serde_json::Value {
|
pub fn request(&self) -> serde_json::Value {
|
||||||
let model = self.embedding_model.name();
|
let model = self.embedding_model.name();
|
||||||
|
|
||||||
let mut query = serde_json::json!({
|
let mut request = serde_json::json!({
|
||||||
"model": model,
|
"model": model,
|
||||||
|
"input": [super::rest::REQUEST_PLACEHOLDER, super::rest::REPEAT_PLACEHOLDER]
|
||||||
});
|
});
|
||||||
|
|
||||||
if self.embedding_model.supports_overriding_dimensions() {
|
if self.embedding_model.supports_overriding_dimensions() {
|
||||||
if let Some(dimensions) = self.dimensions {
|
if let Some(dimensions) = self.dimensions {
|
||||||
query["dimensions"] = dimensions.into();
|
request["dimensions"] = dimensions.into();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
query
|
request
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn distribution(&self) -> Option<DistributionShift> {
|
pub fn distribution(&self) -> Option<DistributionShift> {
|
||||||
@ -180,17 +181,23 @@ impl Embedder {
|
|||||||
|
|
||||||
let url = options.url.as_deref().unwrap_or(OPENAI_EMBEDDINGS_URL).to_owned();
|
let url = options.url.as_deref().unwrap_or(OPENAI_EMBEDDINGS_URL).to_owned();
|
||||||
|
|
||||||
let rest_embedder = RestEmbedder::new(RestEmbedderOptions {
|
let rest_embedder = RestEmbedder::new(
|
||||||
|
RestEmbedderOptions {
|
||||||
api_key: Some(api_key.clone()),
|
api_key: Some(api_key.clone()),
|
||||||
distribution: None,
|
distribution: None,
|
||||||
dimensions: Some(options.dimensions()),
|
dimensions: Some(options.dimensions()),
|
||||||
url,
|
url,
|
||||||
query: options.query(),
|
request: options.request(),
|
||||||
input_field: vec!["input".to_owned()],
|
response: serde_json::json!({
|
||||||
input_type: crate::vector::rest::InputType::TextArray,
|
"data": [{
|
||||||
path_to_embeddings: vec!["data".to_owned()],
|
"embedding": super::rest::RESPONSE_PLACEHOLDER
|
||||||
embedding_object: vec!["embedding".to_owned()],
|
},
|
||||||
})?;
|
super::rest::REPEAT_PLACEHOLDER
|
||||||
|
]
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
super::rest::ConfigurationSource::OpenAi,
|
||||||
|
)?;
|
||||||
|
|
||||||
// looking at the code it is very unclear that this can actually fail.
|
// looking at the code it is very unclear that this can actually fail.
|
||||||
let tokenizer = tiktoken_rs::cl100k_base().unwrap();
|
let tokenizer = tiktoken_rs::cl100k_base().unwrap();
|
||||||
@ -201,7 +208,7 @@ impl Embedder {
|
|||||||
pub fn embed(&self, texts: Vec<String>) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
pub fn embed(&self, texts: Vec<String>) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
||||||
match self.rest_embedder.embed_ref(&texts) {
|
match self.rest_embedder.embed_ref(&texts) {
|
||||||
Ok(embeddings) => Ok(embeddings),
|
Ok(embeddings) => Ok(embeddings),
|
||||||
Err(EmbedError { kind: EmbedErrorKind::RestBadRequest(error), fault: _ }) => {
|
Err(EmbedError { kind: EmbedErrorKind::RestBadRequest(error, _), fault: _ }) => {
|
||||||
tracing::warn!(error=?error, "OpenAI: received `BAD_REQUEST`. Input was maybe too long, retrying on tokenized version. For best performance, limit the size of your document template.");
|
tracing::warn!(error=?error, "OpenAI: received `BAD_REQUEST`. Input was maybe too long, retrying on tokenized version. For best performance, limit the size of your document template.");
|
||||||
self.try_embed_tokenized(&texts)
|
self.try_embed_tokenized(&texts)
|
||||||
}
|
}
|
||||||
@ -225,7 +232,7 @@ impl Embedder {
|
|||||||
|
|
||||||
let embedding = self.rest_embedder.embed_tokens(tokens)?;
|
let embedding = self.rest_embedder.embed_tokens(tokens)?;
|
||||||
embeddings_for_prompt.append(embedding.into_inner()).map_err(|got| {
|
embeddings_for_prompt.append(embedding.into_inner()).map_err(|got| {
|
||||||
EmbedError::openai_unexpected_dimension(self.dimensions(), got.len())
|
EmbedError::rest_unexpected_dimension(self.dimensions(), got.len())
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
all_embeddings.push(embeddings_for_prompt);
|
all_embeddings.push(embeddings_for_prompt);
|
||||||
|
@ -4,6 +4,7 @@ use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use super::error::EmbedErrorKind;
|
use super::error::EmbedErrorKind;
|
||||||
|
use super::json_template::ValueTemplate;
|
||||||
use super::{
|
use super::{
|
||||||
DistributionShift, EmbedError, Embedding, Embeddings, NewEmbedderError, REQUEST_PARALLELISM,
|
DistributionShift, EmbedError, Embedding, Embeddings, NewEmbedderError, REQUEST_PARALLELISM,
|
||||||
};
|
};
|
||||||
@ -11,12 +12,18 @@ use crate::error::FaultSource;
|
|||||||
use crate::ThreadPoolNoAbort;
|
use crate::ThreadPoolNoAbort;
|
||||||
|
|
||||||
// retrying in case of failure
|
// retrying in case of failure
|
||||||
|
|
||||||
pub struct Retry {
|
pub struct Retry {
|
||||||
pub error: EmbedError,
|
pub error: EmbedError,
|
||||||
strategy: RetryStrategy,
|
strategy: RetryStrategy,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum ConfigurationSource {
|
||||||
|
OpenAi,
|
||||||
|
Ollama,
|
||||||
|
User,
|
||||||
|
}
|
||||||
|
|
||||||
pub enum RetryStrategy {
|
pub enum RetryStrategy {
|
||||||
GiveUp,
|
GiveUp,
|
||||||
Retry,
|
Retry,
|
||||||
@ -63,10 +70,20 @@ impl Retry {
|
|||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Embedder {
|
pub struct Embedder {
|
||||||
client: ureq::Agent,
|
data: EmbedderData,
|
||||||
options: EmbedderOptions,
|
|
||||||
bearer: Option<String>,
|
|
||||||
dimensions: usize,
|
dimensions: usize,
|
||||||
|
distribution: Option<DistributionShift>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// All data needed to perform requests and parse responses
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct EmbedderData {
|
||||||
|
client: ureq::Agent,
|
||||||
|
bearer: Option<String>,
|
||||||
|
url: String,
|
||||||
|
request: Request,
|
||||||
|
response: Response,
|
||||||
|
configuration_source: ConfigurationSource,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
|
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
|
||||||
@ -75,29 +92,8 @@ pub struct EmbedderOptions {
|
|||||||
pub distribution: Option<DistributionShift>,
|
pub distribution: Option<DistributionShift>,
|
||||||
pub dimensions: Option<usize>,
|
pub dimensions: Option<usize>,
|
||||||
pub url: String,
|
pub url: String,
|
||||||
pub query: serde_json::Value,
|
pub request: serde_json::Value,
|
||||||
pub input_field: Vec<String>,
|
pub response: serde_json::Value,
|
||||||
// path to the array of embeddings
|
|
||||||
pub path_to_embeddings: Vec<String>,
|
|
||||||
// shape of a single embedding
|
|
||||||
pub embedding_object: Vec<String>,
|
|
||||||
pub input_type: InputType,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for EmbedderOptions {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self {
|
|
||||||
url: Default::default(),
|
|
||||||
query: Default::default(),
|
|
||||||
input_field: vec!["input".into()],
|
|
||||||
path_to_embeddings: vec!["data".into()],
|
|
||||||
embedding_object: vec!["embedding".into()],
|
|
||||||
input_type: InputType::Text,
|
|
||||||
api_key: None,
|
|
||||||
distribution: None,
|
|
||||||
dimensions: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::hash::Hash for EmbedderOptions {
|
impl std::hash::Hash for EmbedderOptions {
|
||||||
@ -106,26 +102,25 @@ impl std::hash::Hash for EmbedderOptions {
|
|||||||
self.distribution.hash(state);
|
self.distribution.hash(state);
|
||||||
self.dimensions.hash(state);
|
self.dimensions.hash(state);
|
||||||
self.url.hash(state);
|
self.url.hash(state);
|
||||||
// skip hashing the query
|
// skip hashing the request and response
|
||||||
// collisions in regular usage should be minimal,
|
// collisions in regular usage should be minimal,
|
||||||
// and the list is limited to 256 values anyway
|
// and the list is limited to 256 values anyway
|
||||||
self.input_field.hash(state);
|
|
||||||
self.path_to_embeddings.hash(state);
|
|
||||||
self.embedding_object.hash(state);
|
|
||||||
self.input_type.hash(state);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, Deserr)]
|
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, Deserr)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
||||||
pub enum InputType {
|
enum InputType {
|
||||||
Text,
|
Text,
|
||||||
TextArray,
|
TextArray,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Embedder {
|
impl Embedder {
|
||||||
pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> {
|
pub fn new(
|
||||||
|
options: EmbedderOptions,
|
||||||
|
configuration_source: ConfigurationSource,
|
||||||
|
) -> Result<Self, NewEmbedderError> {
|
||||||
let bearer = options.api_key.as_deref().map(|api_key| format!("Bearer {api_key}"));
|
let bearer = options.api_key.as_deref().map(|api_key| format!("Bearer {api_key}"));
|
||||||
|
|
||||||
let client = ureq::AgentBuilder::new()
|
let client = ureq::AgentBuilder::new()
|
||||||
@ -133,28 +128,40 @@ impl Embedder {
|
|||||||
.max_idle_connections_per_host(REQUEST_PARALLELISM * 2)
|
.max_idle_connections_per_host(REQUEST_PARALLELISM * 2)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
let request = Request::new(options.request)?;
|
||||||
|
let response = Response::new(options.response, &request)?;
|
||||||
|
|
||||||
|
let data = EmbedderData {
|
||||||
|
client,
|
||||||
|
bearer,
|
||||||
|
url: options.url,
|
||||||
|
request,
|
||||||
|
response,
|
||||||
|
configuration_source,
|
||||||
|
};
|
||||||
|
|
||||||
let dimensions = if let Some(dimensions) = options.dimensions {
|
let dimensions = if let Some(dimensions) = options.dimensions {
|
||||||
dimensions
|
dimensions
|
||||||
} else {
|
} else {
|
||||||
infer_dimensions(&client, &options, bearer.as_deref())?
|
infer_dimensions(&data)?
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(Self { client, dimensions, options, bearer })
|
Ok(Self { data, dimensions, distribution: options.distribution })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn embed(&self, texts: Vec<String>) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
pub fn embed(&self, texts: Vec<String>) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
||||||
embed(&self.client, &self.options, self.bearer.as_deref(), texts.as_slice(), texts.len())
|
embed(&self.data, texts.as_slice(), texts.len(), Some(self.dimensions))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn embed_ref<S>(&self, texts: &[S]) -> Result<Vec<Embeddings<f32>>, EmbedError>
|
pub fn embed_ref<S>(&self, texts: &[S]) -> Result<Vec<Embeddings<f32>>, EmbedError>
|
||||||
where
|
where
|
||||||
S: AsRef<str> + Serialize,
|
S: AsRef<str> + Serialize,
|
||||||
{
|
{
|
||||||
embed(&self.client, &self.options, self.bearer.as_deref(), texts, texts.len())
|
embed(&self.data, texts, texts.len(), Some(self.dimensions))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn embed_tokens(&self, tokens: &[usize]) -> Result<Embeddings<f32>, EmbedError> {
|
pub fn embed_tokens(&self, tokens: &[usize]) -> Result<Embeddings<f32>, EmbedError> {
|
||||||
let mut embeddings = embed(&self.client, &self.options, self.bearer.as_deref(), tokens, 1)?;
|
let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions))?;
|
||||||
// unwrap: guaranteed that embeddings.len() == 1, otherwise the previous line terminated in error
|
// unwrap: guaranteed that embeddings.len() == 1, otherwise the previous line terminated in error
|
||||||
Ok(embeddings.pop().unwrap())
|
Ok(embeddings.pop().unwrap())
|
||||||
}
|
}
|
||||||
@ -179,7 +186,7 @@ impl Embedder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn prompt_count_in_chunk_hint(&self) -> usize {
|
pub fn prompt_count_in_chunk_hint(&self) -> usize {
|
||||||
match self.options.input_type {
|
match self.data.request.input_type() {
|
||||||
InputType::Text => 1,
|
InputType::Text => 1,
|
||||||
InputType::TextArray => 10,
|
InputType::TextArray => 10,
|
||||||
}
|
}
|
||||||
@ -190,87 +197,44 @@ impl Embedder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn distribution(&self) -> Option<DistributionShift> {
|
pub fn distribution(&self) -> Option<DistributionShift> {
|
||||||
self.options.distribution
|
self.distribution
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn infer_dimensions(
|
fn infer_dimensions(data: &EmbedderData) -> Result<usize, NewEmbedderError> {
|
||||||
client: &ureq::Agent,
|
let v = embed(data, ["test"].as_slice(), 1, None)
|
||||||
options: &EmbedderOptions,
|
|
||||||
bearer: Option<&str>,
|
|
||||||
) -> Result<usize, NewEmbedderError> {
|
|
||||||
let v = embed(client, options, bearer, ["test"].as_slice(), 1)
|
|
||||||
.map_err(NewEmbedderError::could_not_determine_dimension)?;
|
.map_err(NewEmbedderError::could_not_determine_dimension)?;
|
||||||
// unwrap: guaranteed that v.len() == 1, otherwise the previous line terminated in error
|
// unwrap: guaranteed that v.len() == 1, otherwise the previous line terminated in error
|
||||||
Ok(v.first().unwrap().dimension())
|
Ok(v.first().unwrap().dimension())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn embed<S>(
|
fn embed<S>(
|
||||||
client: &ureq::Agent,
|
data: &EmbedderData,
|
||||||
options: &EmbedderOptions,
|
|
||||||
bearer: Option<&str>,
|
|
||||||
inputs: &[S],
|
inputs: &[S],
|
||||||
expected_count: usize,
|
expected_count: usize,
|
||||||
|
expected_dimension: Option<usize>,
|
||||||
) -> Result<Vec<Embeddings<f32>>, EmbedError>
|
) -> Result<Vec<Embeddings<f32>>, EmbedError>
|
||||||
where
|
where
|
||||||
S: Serialize,
|
S: Serialize,
|
||||||
{
|
{
|
||||||
let request = client.post(&options.url);
|
let request = data.client.post(&data.url);
|
||||||
let request =
|
let request = if let Some(bearer) = &data.bearer {
|
||||||
if let Some(bearer) = bearer { request.set("Authorization", bearer) } else { request };
|
request.set("Authorization", bearer)
|
||||||
|
} else {
|
||||||
|
request
|
||||||
|
};
|
||||||
let request = request.set("Content-Type", "application/json");
|
let request = request.set("Content-Type", "application/json");
|
||||||
|
|
||||||
let input_value = match options.input_type {
|
let body = data.request.inject_texts(inputs);
|
||||||
InputType::Text => serde_json::json!(inputs.first()),
|
|
||||||
InputType::TextArray => serde_json::json!(inputs),
|
|
||||||
};
|
|
||||||
|
|
||||||
let body = match options.input_field.as_slice() {
|
|
||||||
[] => {
|
|
||||||
// inject input in body
|
|
||||||
input_value
|
|
||||||
}
|
|
||||||
[input] => {
|
|
||||||
let mut body = options.query.clone();
|
|
||||||
|
|
||||||
body.as_object_mut()
|
|
||||||
.ok_or_else(|| {
|
|
||||||
EmbedError::rest_not_an_object(
|
|
||||||
options.query.clone(),
|
|
||||||
options.input_field.clone(),
|
|
||||||
)
|
|
||||||
})?
|
|
||||||
.insert(input.clone(), input_value);
|
|
||||||
body
|
|
||||||
}
|
|
||||||
[path @ .., input] => {
|
|
||||||
let mut body = options.query.clone();
|
|
||||||
|
|
||||||
let mut current_value = &mut body;
|
|
||||||
for component in path {
|
|
||||||
current_value = current_value
|
|
||||||
.as_object_mut()
|
|
||||||
.ok_or_else(|| {
|
|
||||||
EmbedError::rest_not_an_object(
|
|
||||||
options.query.clone(),
|
|
||||||
options.input_field.clone(),
|
|
||||||
)
|
|
||||||
})?
|
|
||||||
.entry(component.clone())
|
|
||||||
.or_insert(serde_json::json!({}));
|
|
||||||
}
|
|
||||||
|
|
||||||
current_value.as_object_mut().unwrap().insert(input.clone(), input_value);
|
|
||||||
body
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
for attempt in 0..10 {
|
for attempt in 0..10 {
|
||||||
let response = request.clone().send_json(&body);
|
let response = request.clone().send_json(&body);
|
||||||
let result = check_response(response);
|
let result = check_response(response, data.configuration_source);
|
||||||
|
|
||||||
let retry_duration = match result {
|
let retry_duration = match result {
|
||||||
Ok(response) => return response_to_embedding(response, options, expected_count),
|
Ok(response) => {
|
||||||
|
return response_to_embedding(response, data, expected_count, expected_dimension)
|
||||||
|
}
|
||||||
Err(retry) => {
|
Err(retry) => {
|
||||||
tracing::warn!("Failed: {}", retry.error);
|
tracing::warn!("Failed: {}", retry.error);
|
||||||
retry.into_duration(attempt)
|
retry.into_duration(attempt)
|
||||||
@ -288,13 +252,16 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
let response = request.send_json(&body);
|
let response = request.send_json(&body);
|
||||||
let result = check_response(response);
|
let result = check_response(response, data.configuration_source);
|
||||||
result
|
result.map_err(Retry::into_error).and_then(|response| {
|
||||||
.map_err(Retry::into_error)
|
response_to_embedding(response, data, expected_count, expected_dimension)
|
||||||
.and_then(|response| response_to_embedding(response, options, expected_count))
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn check_response(response: Result<ureq::Response, ureq::Error>) -> Result<ureq::Response, Retry> {
|
fn check_response(
|
||||||
|
response: Result<ureq::Response, ureq::Error>,
|
||||||
|
configuration_source: ConfigurationSource,
|
||||||
|
) -> Result<ureq::Response, Retry> {
|
||||||
match response {
|
match response {
|
||||||
Ok(response) => Ok(response),
|
Ok(response) => Ok(response),
|
||||||
Err(ureq::Error::Status(code, response)) => {
|
Err(ureq::Error::Status(code, response)) => {
|
||||||
@ -302,7 +269,10 @@ fn check_response(response: Result<ureq::Response, ureq::Error>) -> Result<ureq:
|
|||||||
Err(match code {
|
Err(match code {
|
||||||
401 => Retry::give_up(EmbedError::rest_unauthorized(error_response)),
|
401 => Retry::give_up(EmbedError::rest_unauthorized(error_response)),
|
||||||
429 => Retry::rate_limited(EmbedError::rest_too_many_requests(error_response)),
|
429 => Retry::rate_limited(EmbedError::rest_too_many_requests(error_response)),
|
||||||
400 => Retry::give_up(EmbedError::rest_bad_request(error_response)),
|
400 => Retry::give_up(EmbedError::rest_bad_request(
|
||||||
|
error_response,
|
||||||
|
configuration_source,
|
||||||
|
)),
|
||||||
500..=599 => {
|
500..=599 => {
|
||||||
Retry::retry_later(EmbedError::rest_internal_server_error(code, error_response))
|
Retry::retry_later(EmbedError::rest_internal_server_error(code, error_response))
|
||||||
}
|
}
|
||||||
@ -320,68 +290,111 @@ fn check_response(response: Result<ureq::Response, ureq::Error>) -> Result<ureq:
|
|||||||
|
|
||||||
fn response_to_embedding(
|
fn response_to_embedding(
|
||||||
response: ureq::Response,
|
response: ureq::Response,
|
||||||
options: &EmbedderOptions,
|
data: &EmbedderData,
|
||||||
expected_count: usize,
|
expected_count: usize,
|
||||||
|
expected_dimensions: Option<usize>,
|
||||||
) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
||||||
let response: serde_json::Value =
|
let response: serde_json::Value =
|
||||||
response.into_json().map_err(EmbedError::rest_response_deserialization)?;
|
response.into_json().map_err(EmbedError::rest_response_deserialization)?;
|
||||||
|
|
||||||
let mut current_value = &response;
|
let embeddings = data.response.extract_embeddings(response)?;
|
||||||
for component in &options.path_to_embeddings {
|
|
||||||
let component = component.as_ref();
|
|
||||||
current_value = current_value.get(component).ok_or_else(|| {
|
|
||||||
EmbedError::rest_response_missing_embeddings(
|
|
||||||
response.clone(),
|
|
||||||
component,
|
|
||||||
&options.path_to_embeddings,
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let embeddings = match options.input_type {
|
|
||||||
InputType::Text => {
|
|
||||||
for component in &options.embedding_object {
|
|
||||||
current_value = current_value.get(component).ok_or_else(|| {
|
|
||||||
EmbedError::rest_response_missing_embeddings(
|
|
||||||
response.clone(),
|
|
||||||
component,
|
|
||||||
&options.embedding_object,
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
}
|
|
||||||
let embeddings = current_value.to_owned();
|
|
||||||
let embeddings: Embedding =
|
|
||||||
serde_json::from_value(embeddings).map_err(EmbedError::rest_response_format)?;
|
|
||||||
|
|
||||||
vec![Embeddings::from_single_embedding(embeddings)]
|
|
||||||
}
|
|
||||||
InputType::TextArray => {
|
|
||||||
let empty = vec![];
|
|
||||||
let values = current_value.as_array().unwrap_or(&empty);
|
|
||||||
let mut embeddings: Vec<Embeddings<f32>> = Vec::with_capacity(expected_count);
|
|
||||||
for value in values {
|
|
||||||
let mut current_value = value;
|
|
||||||
for component in &options.embedding_object {
|
|
||||||
current_value = current_value.get(component).ok_or_else(|| {
|
|
||||||
EmbedError::rest_response_missing_embeddings(
|
|
||||||
response.clone(),
|
|
||||||
component,
|
|
||||||
&options.embedding_object,
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
}
|
|
||||||
let embedding = current_value.to_owned();
|
|
||||||
let embedding: Embedding =
|
|
||||||
serde_json::from_value(embedding).map_err(EmbedError::rest_response_format)?;
|
|
||||||
embeddings.push(Embeddings::from_single_embedding(embedding));
|
|
||||||
}
|
|
||||||
embeddings
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if embeddings.len() != expected_count {
|
if embeddings.len() != expected_count {
|
||||||
return Err(EmbedError::rest_response_embedding_count(expected_count, embeddings.len()));
|
return Err(EmbedError::rest_response_embedding_count(expected_count, embeddings.len()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(dimensions) = expected_dimensions {
|
||||||
|
for embedding in &embeddings {
|
||||||
|
if embedding.dimension() != dimensions {
|
||||||
|
return Err(EmbedError::rest_unexpected_dimension(
|
||||||
|
dimensions,
|
||||||
|
embedding.dimension(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(embeddings)
|
Ok(embeddings)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(super) const REQUEST_PLACEHOLDER: &str = "{{text}}";
|
||||||
|
pub(super) const RESPONSE_PLACEHOLDER: &str = "{{embedding}}";
|
||||||
|
pub(super) const REPEAT_PLACEHOLDER: &str = "{{..}}";
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Request {
|
||||||
|
template: ValueTemplate,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Request {
|
||||||
|
pub fn new(template: serde_json::Value) -> Result<Self, NewEmbedderError> {
|
||||||
|
let template = match ValueTemplate::new(template, REQUEST_PLACEHOLDER, REPEAT_PLACEHOLDER) {
|
||||||
|
Ok(template) => template,
|
||||||
|
Err(error) => {
|
||||||
|
let message =
|
||||||
|
error.error_message("request", REQUEST_PLACEHOLDER, REPEAT_PLACEHOLDER);
|
||||||
|
return Err(NewEmbedderError::rest_could_not_parse_template(message));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Self { template })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn input_type(&self) -> InputType {
|
||||||
|
if self.template.has_array_value() {
|
||||||
|
InputType::TextArray
|
||||||
|
} else {
|
||||||
|
InputType::Text
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn inject_texts<S: Serialize>(
|
||||||
|
&self,
|
||||||
|
texts: impl IntoIterator<Item = S>,
|
||||||
|
) -> serde_json::Value {
|
||||||
|
self.template.inject(texts.into_iter().map(|s| serde_json::json!(s))).unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Response {
|
||||||
|
template: ValueTemplate,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Response {
|
||||||
|
pub fn new(template: serde_json::Value, request: &Request) -> Result<Self, NewEmbedderError> {
|
||||||
|
let template = match ValueTemplate::new(template, RESPONSE_PLACEHOLDER, REPEAT_PLACEHOLDER)
|
||||||
|
{
|
||||||
|
Ok(template) => template,
|
||||||
|
Err(error) => {
|
||||||
|
let message =
|
||||||
|
error.error_message("response", RESPONSE_PLACEHOLDER, REPEAT_PLACEHOLDER);
|
||||||
|
return Err(NewEmbedderError::rest_could_not_parse_template(message));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
match (template.has_array_value(), request.template.has_array_value()) {
|
||||||
|
(true, true) | (false, false) => Ok(Self {template}),
|
||||||
|
(true, false) => Err(NewEmbedderError::rest_could_not_parse_template("in `response`: `response` has multiple embeddings, but `request` has only one text to embed".to_string())),
|
||||||
|
(false, true) => Err(NewEmbedderError::rest_could_not_parse_template("in `response`: `response` has a single embedding, but `request` has multiple texts to embed".to_string())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn extract_embeddings(
|
||||||
|
&self,
|
||||||
|
response: serde_json::Value,
|
||||||
|
) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
||||||
|
let extracted_values: Vec<Embedding> = match self.template.extract(response) {
|
||||||
|
Ok(extracted_values) => extracted_values,
|
||||||
|
Err(error) => {
|
||||||
|
let error_message =
|
||||||
|
error.error_message("response", "{{embedding}}", "an array of numbers");
|
||||||
|
return Err(EmbedError::rest_extraction_error(error_message));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let embeddings: Vec<Embeddings<f32>> =
|
||||||
|
extracted_values.into_iter().map(Embeddings::from_single_embedding).collect();
|
||||||
|
|
||||||
|
Ok(embeddings)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -2,7 +2,6 @@ use deserr::Deserr;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use super::rest::InputType;
|
|
||||||
use super::{ollama, openai, DistributionShift};
|
use super::{ollama, openai, DistributionShift};
|
||||||
use crate::prompt::PromptData;
|
use crate::prompt::PromptData;
|
||||||
use crate::update::Setting;
|
use crate::update::Setting;
|
||||||
@ -36,19 +35,10 @@ pub struct EmbeddingSettings {
|
|||||||
pub url: Setting<String>,
|
pub url: Setting<String>,
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
pub query: Setting<serde_json::Value>,
|
pub request: Setting<serde_json::Value>,
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
pub input_field: Setting<Vec<String>>,
|
pub response: Setting<serde_json::Value>,
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[deserr(default)]
|
|
||||||
pub path_to_embeddings: Setting<Vec<String>>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[deserr(default)]
|
|
||||||
pub embedding_object: Setting<Vec<String>>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[deserr(default)]
|
|
||||||
pub input_type: Setting<InputType>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
pub distribution: Setting<DistributionShift>,
|
pub distribution: Setting<DistributionShift>,
|
||||||
@ -112,11 +102,8 @@ impl SettingsDiff {
|
|||||||
mut dimensions,
|
mut dimensions,
|
||||||
mut document_template,
|
mut document_template,
|
||||||
mut url,
|
mut url,
|
||||||
mut query,
|
mut request,
|
||||||
mut input_field,
|
mut response,
|
||||||
mut path_to_embeddings,
|
|
||||||
mut embedding_object,
|
|
||||||
mut input_type,
|
|
||||||
mut distribution,
|
mut distribution,
|
||||||
} = old;
|
} = old;
|
||||||
|
|
||||||
@ -128,11 +115,8 @@ impl SettingsDiff {
|
|||||||
dimensions: new_dimensions,
|
dimensions: new_dimensions,
|
||||||
document_template: new_document_template,
|
document_template: new_document_template,
|
||||||
url: new_url,
|
url: new_url,
|
||||||
query: new_query,
|
request: new_request,
|
||||||
input_field: new_input_field,
|
response: new_response,
|
||||||
path_to_embeddings: new_path_to_embeddings,
|
|
||||||
embedding_object: new_embedding_object,
|
|
||||||
input_type: new_input_type,
|
|
||||||
distribution: new_distribution,
|
distribution: new_distribution,
|
||||||
} = new;
|
} = new;
|
||||||
|
|
||||||
@ -148,11 +132,8 @@ impl SettingsDiff {
|
|||||||
&mut revision,
|
&mut revision,
|
||||||
&mut dimensions,
|
&mut dimensions,
|
||||||
&mut url,
|
&mut url,
|
||||||
&mut query,
|
&mut request,
|
||||||
&mut input_field,
|
&mut response,
|
||||||
&mut path_to_embeddings,
|
|
||||||
&mut embedding_object,
|
|
||||||
&mut input_type,
|
|
||||||
&mut document_template,
|
&mut document_template,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -177,19 +158,10 @@ impl SettingsDiff {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if query.apply(new_query) {
|
if request.apply(new_request) {
|
||||||
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
||||||
}
|
}
|
||||||
if input_field.apply(new_input_field) {
|
if response.apply(new_response) {
|
||||||
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
|
||||||
}
|
|
||||||
if path_to_embeddings.apply(new_path_to_embeddings) {
|
|
||||||
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
|
||||||
}
|
|
||||||
if embedding_object.apply(new_embedding_object) {
|
|
||||||
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
|
||||||
}
|
|
||||||
if input_type.apply(new_input_type) {
|
|
||||||
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
||||||
}
|
}
|
||||||
if document_template.apply(new_document_template) {
|
if document_template.apply(new_document_template) {
|
||||||
@ -210,11 +182,8 @@ impl SettingsDiff {
|
|||||||
dimensions,
|
dimensions,
|
||||||
document_template,
|
document_template,
|
||||||
url,
|
url,
|
||||||
query,
|
request,
|
||||||
input_field,
|
response,
|
||||||
path_to_embeddings,
|
|
||||||
embedding_object,
|
|
||||||
input_type,
|
|
||||||
distribution,
|
distribution,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -246,11 +215,8 @@ fn apply_default_for_source(
|
|||||||
revision: &mut Setting<String>,
|
revision: &mut Setting<String>,
|
||||||
dimensions: &mut Setting<usize>,
|
dimensions: &mut Setting<usize>,
|
||||||
url: &mut Setting<String>,
|
url: &mut Setting<String>,
|
||||||
query: &mut Setting<serde_json::Value>,
|
request: &mut Setting<serde_json::Value>,
|
||||||
input_field: &mut Setting<Vec<String>>,
|
response: &mut Setting<serde_json::Value>,
|
||||||
path_to_embeddings: &mut Setting<Vec<String>>,
|
|
||||||
embedding_object: &mut Setting<Vec<String>>,
|
|
||||||
input_type: &mut Setting<InputType>,
|
|
||||||
document_template: &mut Setting<String>,
|
document_template: &mut Setting<String>,
|
||||||
) {
|
) {
|
||||||
match source {
|
match source {
|
||||||
@ -259,55 +225,40 @@ fn apply_default_for_source(
|
|||||||
*revision = Setting::Reset;
|
*revision = Setting::Reset;
|
||||||
*dimensions = Setting::NotSet;
|
*dimensions = Setting::NotSet;
|
||||||
*url = Setting::NotSet;
|
*url = Setting::NotSet;
|
||||||
*query = Setting::NotSet;
|
*request = Setting::NotSet;
|
||||||
*input_field = Setting::NotSet;
|
*response = Setting::NotSet;
|
||||||
*path_to_embeddings = Setting::NotSet;
|
|
||||||
*embedding_object = Setting::NotSet;
|
|
||||||
*input_type = Setting::NotSet;
|
|
||||||
}
|
}
|
||||||
Setting::Set(EmbedderSource::Ollama) => {
|
Setting::Set(EmbedderSource::Ollama) => {
|
||||||
*model = Setting::Reset;
|
*model = Setting::Reset;
|
||||||
*revision = Setting::NotSet;
|
*revision = Setting::NotSet;
|
||||||
*dimensions = Setting::Reset;
|
*dimensions = Setting::Reset;
|
||||||
*url = Setting::NotSet;
|
*url = Setting::NotSet;
|
||||||
*query = Setting::NotSet;
|
*request = Setting::NotSet;
|
||||||
*input_field = Setting::NotSet;
|
*response = Setting::NotSet;
|
||||||
*path_to_embeddings = Setting::NotSet;
|
|
||||||
*embedding_object = Setting::NotSet;
|
|
||||||
*input_type = Setting::NotSet;
|
|
||||||
}
|
}
|
||||||
Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => {
|
Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => {
|
||||||
*model = Setting::Reset;
|
*model = Setting::Reset;
|
||||||
*revision = Setting::NotSet;
|
*revision = Setting::NotSet;
|
||||||
*dimensions = Setting::NotSet;
|
*dimensions = Setting::NotSet;
|
||||||
*url = Setting::Reset;
|
*url = Setting::Reset;
|
||||||
*query = Setting::NotSet;
|
*request = Setting::NotSet;
|
||||||
*input_field = Setting::NotSet;
|
*response = Setting::NotSet;
|
||||||
*path_to_embeddings = Setting::NotSet;
|
|
||||||
*embedding_object = Setting::NotSet;
|
|
||||||
*input_type = Setting::NotSet;
|
|
||||||
}
|
}
|
||||||
Setting::Set(EmbedderSource::Rest) => {
|
Setting::Set(EmbedderSource::Rest) => {
|
||||||
*model = Setting::NotSet;
|
*model = Setting::NotSet;
|
||||||
*revision = Setting::NotSet;
|
*revision = Setting::NotSet;
|
||||||
*dimensions = Setting::Reset;
|
*dimensions = Setting::Reset;
|
||||||
*url = Setting::Reset;
|
*url = Setting::Reset;
|
||||||
*query = Setting::Reset;
|
*request = Setting::Reset;
|
||||||
*input_field = Setting::Reset;
|
*response = Setting::Reset;
|
||||||
*path_to_embeddings = Setting::Reset;
|
|
||||||
*embedding_object = Setting::Reset;
|
|
||||||
*input_type = Setting::Reset;
|
|
||||||
}
|
}
|
||||||
Setting::Set(EmbedderSource::UserProvided) => {
|
Setting::Set(EmbedderSource::UserProvided) => {
|
||||||
*model = Setting::NotSet;
|
*model = Setting::NotSet;
|
||||||
*revision = Setting::NotSet;
|
*revision = Setting::NotSet;
|
||||||
*dimensions = Setting::Reset;
|
*dimensions = Setting::Reset;
|
||||||
*url = Setting::NotSet;
|
*url = Setting::NotSet;
|
||||||
*query = Setting::NotSet;
|
*request = Setting::NotSet;
|
||||||
*input_field = Setting::NotSet;
|
*response = Setting::NotSet;
|
||||||
*path_to_embeddings = Setting::NotSet;
|
|
||||||
*embedding_object = Setting::NotSet;
|
|
||||||
*input_type = Setting::NotSet;
|
|
||||||
*document_template = Setting::NotSet;
|
*document_template = Setting::NotSet;
|
||||||
}
|
}
|
||||||
Setting::NotSet => {}
|
Setting::NotSet => {}
|
||||||
@ -340,11 +291,8 @@ impl EmbeddingSettings {
|
|||||||
pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate";
|
pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate";
|
||||||
|
|
||||||
pub const URL: &'static str = "url";
|
pub const URL: &'static str = "url";
|
||||||
pub const QUERY: &'static str = "query";
|
pub const REQUEST: &'static str = "request";
|
||||||
pub const INPUT_FIELD: &'static str = "inputField";
|
pub const RESPONSE: &'static str = "response";
|
||||||
pub const PATH_TO_EMBEDDINGS: &'static str = "pathToEmbeddings";
|
|
||||||
pub const EMBEDDING_OBJECT: &'static str = "embeddingObject";
|
|
||||||
pub const INPUT_TYPE: &'static str = "inputType";
|
|
||||||
|
|
||||||
pub const DISTRIBUTION: &'static str = "distribution";
|
pub const DISTRIBUTION: &'static str = "distribution";
|
||||||
|
|
||||||
@ -374,11 +322,8 @@ impl EmbeddingSettings {
|
|||||||
EmbedderSource::Rest,
|
EmbedderSource::Rest,
|
||||||
],
|
],
|
||||||
Self::URL => &[EmbedderSource::Ollama, EmbedderSource::Rest, EmbedderSource::OpenAi],
|
Self::URL => &[EmbedderSource::Ollama, EmbedderSource::Rest, EmbedderSource::OpenAi],
|
||||||
Self::QUERY => &[EmbedderSource::Rest],
|
Self::REQUEST => &[EmbedderSource::Rest],
|
||||||
Self::INPUT_FIELD => &[EmbedderSource::Rest],
|
Self::RESPONSE => &[EmbedderSource::Rest],
|
||||||
Self::PATH_TO_EMBEDDINGS => &[EmbedderSource::Rest],
|
|
||||||
Self::EMBEDDING_OBJECT => &[EmbedderSource::Rest],
|
|
||||||
Self::INPUT_TYPE => &[EmbedderSource::Rest],
|
|
||||||
Self::DISTRIBUTION => &[
|
Self::DISTRIBUTION => &[
|
||||||
EmbedderSource::HuggingFace,
|
EmbedderSource::HuggingFace,
|
||||||
EmbedderSource::Ollama,
|
EmbedderSource::Ollama,
|
||||||
@ -423,11 +368,8 @@ impl EmbeddingSettings {
|
|||||||
Self::DIMENSIONS,
|
Self::DIMENSIONS,
|
||||||
Self::DOCUMENT_TEMPLATE,
|
Self::DOCUMENT_TEMPLATE,
|
||||||
Self::URL,
|
Self::URL,
|
||||||
Self::QUERY,
|
Self::REQUEST,
|
||||||
Self::INPUT_FIELD,
|
Self::RESPONSE,
|
||||||
Self::PATH_TO_EMBEDDINGS,
|
|
||||||
Self::EMBEDDING_OBJECT,
|
|
||||||
Self::INPUT_TYPE,
|
|
||||||
Self::DISTRIBUTION,
|
Self::DISTRIBUTION,
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
@ -496,11 +438,8 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
dimensions: Setting::NotSet,
|
dimensions: Setting::NotSet,
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
url: Setting::NotSet,
|
url: Setting::NotSet,
|
||||||
query: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
input_field: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
path_to_embeddings: Setting::NotSet,
|
|
||||||
embedding_object: Setting::NotSet,
|
|
||||||
input_type: Setting::NotSet,
|
|
||||||
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
||||||
@ -517,11 +456,8 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
dimensions: dimensions.map(Setting::Set).unwrap_or_default(),
|
dimensions: dimensions.map(Setting::Set).unwrap_or_default(),
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
url: url.map(Setting::Set).unwrap_or_default(),
|
url: url.map(Setting::Set).unwrap_or_default(),
|
||||||
query: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
input_field: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
path_to_embeddings: Setting::NotSet,
|
|
||||||
embedding_object: Setting::NotSet,
|
|
||||||
input_type: Setting::NotSet,
|
|
||||||
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
|
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
|
||||||
@ -537,11 +473,8 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
dimensions: Setting::NotSet,
|
dimensions: Setting::NotSet,
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
url: url.map(Setting::Set).unwrap_or_default(),
|
url: url.map(Setting::Set).unwrap_or_default(),
|
||||||
query: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
input_field: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
path_to_embeddings: Setting::NotSet,
|
|
||||||
embedding_object: Setting::NotSet,
|
|
||||||
input_type: Setting::NotSet,
|
|
||||||
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
|
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
|
||||||
@ -555,22 +488,16 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
dimensions: Setting::Set(dimensions),
|
dimensions: Setting::Set(dimensions),
|
||||||
document_template: Setting::NotSet,
|
document_template: Setting::NotSet,
|
||||||
url: Setting::NotSet,
|
url: Setting::NotSet,
|
||||||
query: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
input_field: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
path_to_embeddings: Setting::NotSet,
|
|
||||||
embedding_object: Setting::NotSet,
|
|
||||||
input_type: Setting::NotSet,
|
|
||||||
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
||||||
api_key,
|
api_key,
|
||||||
dimensions,
|
dimensions,
|
||||||
url,
|
url,
|
||||||
query,
|
request,
|
||||||
input_field,
|
response,
|
||||||
path_to_embeddings,
|
|
||||||
embedding_object,
|
|
||||||
input_type,
|
|
||||||
distribution,
|
distribution,
|
||||||
}) => Self {
|
}) => Self {
|
||||||
source: Setting::Set(EmbedderSource::Rest),
|
source: Setting::Set(EmbedderSource::Rest),
|
||||||
@ -580,11 +507,8 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
dimensions: dimensions.map(Setting::Set).unwrap_or_default(),
|
dimensions: dimensions.map(Setting::Set).unwrap_or_default(),
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
url: Setting::Set(url),
|
url: Setting::Set(url),
|
||||||
query: Setting::Set(query),
|
request: Setting::Set(request),
|
||||||
input_field: Setting::Set(input_field),
|
response: Setting::Set(response),
|
||||||
path_to_embeddings: Setting::Set(path_to_embeddings),
|
|
||||||
embedding_object: Setting::Set(embedding_object),
|
|
||||||
input_type: Setting::Set(input_type),
|
|
||||||
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -602,11 +526,8 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
|||||||
dimensions,
|
dimensions,
|
||||||
document_template,
|
document_template,
|
||||||
url,
|
url,
|
||||||
query,
|
request,
|
||||||
input_field,
|
response,
|
||||||
path_to_embeddings,
|
|
||||||
embedding_object,
|
|
||||||
input_type,
|
|
||||||
distribution,
|
distribution,
|
||||||
} = value;
|
} = value;
|
||||||
|
|
||||||
@ -669,22 +590,13 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
EmbedderSource::Rest => {
|
EmbedderSource::Rest => {
|
||||||
let embedder_options = super::rest::EmbedderOptions::default();
|
|
||||||
|
|
||||||
this.embedder_options =
|
this.embedder_options =
|
||||||
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
||||||
api_key: api_key.set(),
|
api_key: api_key.set(),
|
||||||
dimensions: dimensions.set(),
|
dimensions: dimensions.set(),
|
||||||
url: url.set().unwrap(),
|
url: url.set().unwrap(),
|
||||||
query: query.set().unwrap_or(embedder_options.query),
|
request: request.set().unwrap(),
|
||||||
input_field: input_field.set().unwrap_or(embedder_options.input_field),
|
response: response.set().unwrap(),
|
||||||
path_to_embeddings: path_to_embeddings
|
|
||||||
.set()
|
|
||||||
.unwrap_or(embedder_options.path_to_embeddings),
|
|
||||||
embedding_object: embedding_object
|
|
||||||
.set()
|
|
||||||
.unwrap_or(embedder_options.embedding_object),
|
|
||||||
input_type: input_type.set().unwrap_or(embedder_options.input_type),
|
|
||||||
distribution: distribution.set(),
|
distribution: distribution.set(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user